GPU: Handle VRAM wrap-around behavior on scanout
This commit is contained in:
parent
fc3efebb38
commit
9e024b7a51
|
@ -487,22 +487,19 @@ void GPU::UpdateCRTCDisplayParameters()
|
|||
|
||||
if (horizontal_display_end <= horizontal_visible_end_tick)
|
||||
{
|
||||
cs.display_vram_width = std::min<u16>(
|
||||
cs.display_vram_width =
|
||||
std::max<u16>((((horizontal_display_end - std::max(horizontal_display_start, horizontal_visible_start_tick)) +
|
||||
(cs.dot_clock_divider - 1)) /
|
||||
cs.dot_clock_divider),
|
||||
1u),
|
||||
VRAM_WIDTH - cs.display_vram_left);
|
||||
1u);
|
||||
}
|
||||
else
|
||||
{
|
||||
cs.display_vram_width = std::min<u16>(
|
||||
std::max<u16>(
|
||||
(((horizontal_visible_end_tick - std::max(horizontal_display_start, horizontal_visible_start_tick)) +
|
||||
(cs.dot_clock_divider - 1)) /
|
||||
cs.dot_clock_divider),
|
||||
1u),
|
||||
VRAM_WIDTH - cs.display_vram_left);
|
||||
cs.display_vram_width = std::max<u16>(
|
||||
(((horizontal_visible_end_tick - std::max(horizontal_display_start, horizontal_visible_start_tick)) +
|
||||
(cs.dot_clock_divider - 1)) /
|
||||
cs.dot_clock_divider),
|
||||
1u);
|
||||
}
|
||||
|
||||
if (vertical_display_start >= vertical_visible_start_line)
|
||||
|
@ -513,21 +510,19 @@ void GPU::UpdateCRTCDisplayParameters()
|
|||
else
|
||||
{
|
||||
cs.display_origin_top = 0;
|
||||
cs.display_vram_top = std::min<u16>(
|
||||
m_crtc_state.regs.Y + ((vertical_visible_start_line - vertical_display_start) << height_shift), VRAM_HEIGHT - 1);
|
||||
cs.display_vram_top =
|
||||
m_crtc_state.regs.Y + ((vertical_visible_start_line - vertical_display_start) << height_shift);
|
||||
}
|
||||
|
||||
if (vertical_display_end <= vertical_visible_end_line)
|
||||
{
|
||||
cs.display_vram_height = std::min<u16>(
|
||||
(vertical_display_end - std::max(vertical_display_start, vertical_visible_start_line)) << height_shift,
|
||||
VRAM_HEIGHT - cs.display_vram_top);
|
||||
cs.display_vram_height = (vertical_display_end - std::max(vertical_display_start, vertical_visible_start_line))
|
||||
<< height_shift;
|
||||
}
|
||||
else
|
||||
{
|
||||
cs.display_vram_height = std::min<u16>(
|
||||
(vertical_visible_end_line - std::max(vertical_display_start, vertical_visible_start_line)) << height_shift,
|
||||
VRAM_HEIGHT - cs.display_vram_top);
|
||||
cs.display_vram_height = (vertical_visible_end_line - std::max(vertical_display_start, vertical_visible_start_line))
|
||||
<< height_shift;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -553,7 +553,9 @@ void GPU_HW_D3D11::UpdateDisplay()
|
|||
{
|
||||
m_host_display->ClearDisplayTexture();
|
||||
}
|
||||
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced)
|
||||
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced &&
|
||||
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
|
||||
(scaled_vram_offset_y + scaled_vram_offset_y <= m_vram_texture.GetHeight()))
|
||||
{
|
||||
m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(),
|
||||
m_vram_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y,
|
||||
|
@ -567,15 +569,15 @@ void GPU_HW_D3D11::UpdateDisplay()
|
|||
const u32 reinterpret_field_offset = GetInterlacedField();
|
||||
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
|
||||
const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
|
||||
const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x};
|
||||
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, reinterpret_field_offset};
|
||||
ID3D11PixelShader* display_pixel_shader =
|
||||
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Get();
|
||||
|
||||
SetViewportAndScissor(reinterpret_start_x, scaled_vram_offset_y, reinterpret_width, scaled_display_height);
|
||||
SetViewportAndScissor(0, reinterpret_field_offset, reinterpret_width, scaled_display_height);
|
||||
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
|
||||
|
||||
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
|
||||
m_display_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y,
|
||||
m_display_texture.GetHeight(), scaled_vram_offset_x - reinterpret_start_x, 0,
|
||||
scaled_display_width, scaled_display_height);
|
||||
|
||||
RestoreGraphicsAPIState();
|
||||
|
|
|
@ -506,7 +506,9 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
|||
{
|
||||
m_host_display->ClearDisplayTexture();
|
||||
}
|
||||
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced)
|
||||
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced &&
|
||||
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
|
||||
(scaled_vram_offset_y + scaled_vram_offset_y <= m_vram_texture.GetHeight()))
|
||||
{
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
|
||||
m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x,
|
||||
|
@ -525,20 +527,19 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
|||
const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
|
||||
const u32 scaled_flipped_vram_offset_y =
|
||||
m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
|
||||
|
||||
const u32 reinterpret_field_offset = GetInterlacedField();
|
||||
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
|
||||
const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
|
||||
const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x};
|
||||
const u32 uniforms[4] = {reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_field_offset};
|
||||
UploadUniformBlock(uniforms, sizeof(uniforms));
|
||||
m_batch_ubo_dirty = true;
|
||||
|
||||
glViewport(reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_width, scaled_display_height);
|
||||
glViewport(0, reinterpret_field_offset, reinterpret_width, scaled_display_height);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
|
||||
m_display_texture.GetWidth(), m_display_texture.GetHeight(),
|
||||
scaled_vram_offset_x, m_vram_texture.GetHeight() - scaled_vram_offset_y,
|
||||
scaled_vram_offset_x - reinterpret_start_x, scaled_display_height,
|
||||
scaled_display_width, -static_cast<s32>(scaled_display_height));
|
||||
|
||||
// restore state
|
||||
|
|
|
@ -770,37 +770,39 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
|
|||
DefineMacro(ss, "INTERLACED", interlaced);
|
||||
|
||||
WriteCommonFunctions(ss);
|
||||
DeclareUniformBuffer(ss, {"int u_field_offset", "int u_vram_start_x"});
|
||||
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_field_offset"});
|
||||
DeclareTexture(ss, "samp0", 0);
|
||||
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
|
||||
ss << R"(
|
||||
{
|
||||
int2 icoords = int2(v_pos.xy);
|
||||
uint2 icoords = uint2(v_pos.xy) + u_vram_offset;
|
||||
|
||||
#if INTERLACED
|
||||
if (((fixYCoord(icoords.y) / RESOLUTION_SCALE) & 1) != u_field_offset)
|
||||
if (((icoords.y / uint(RESOLUTION_SCALE)) & 1u) != u_field_offset)
|
||||
discard;
|
||||
#endif
|
||||
|
||||
//icoords.y = uint(fixYCoord(int(icoords.y)));
|
||||
|
||||
#if DEPTH_24BIT
|
||||
// relative to start of scanout
|
||||
int relative_x = (icoords.x - u_vram_start_x) / RESOLUTION_SCALE;
|
||||
icoords.x = u_vram_start_x + ((relative_x * 3) / 2) * RESOLUTION_SCALE;
|
||||
uint relative_x = (icoords.x - u_vram_offset.x) / uint(RESOLUTION_SCALE);
|
||||
icoords.x = u_vram_offset.x + ((relative_x * 3u) / 2u) * uint(RESOLUTION_SCALE);
|
||||
|
||||
// load adjacent 16-bit texels
|
||||
uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, icoords, 0));
|
||||
uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, icoords + int2(RESOLUTION_SCALE, 0), 0));
|
||||
uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(icoords % uint2(VRAM_SIZE)), 0));
|
||||
uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2((icoords + uint2(uint(RESOLUTION_SCALE), 0)) % uint2(VRAM_SIZE)), 0));
|
||||
|
||||
// select which part of the combined 16-bit texels we are currently shading
|
||||
uint s1s0 = ((s1 << 16) | s0) >> ((relative_x & 1) * 8);
|
||||
uint s1s0 = ((s1 << 16) | s0) >> ((relative_x & 1u) * 8u);
|
||||
|
||||
// extract components and normalize
|
||||
o_col0 = float4(float(s1s0 & 0xFFu) / 255.0, float((s1s0 >> 8u) & 0xFFu) / 255.0,
|
||||
float((s1s0 >> 16u) & 0xFFu) / 255.0, 1.0);
|
||||
#else
|
||||
// load and return
|
||||
o_col0 = LOAD_TEXTURE(samp0, icoords, 0);
|
||||
o_col0 = LOAD_TEXTURE(samp0, int2(icoords % uint2(VRAM_SIZE)), 0);
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
|
|
@ -41,38 +41,99 @@ void GPU_SW::Reset()
|
|||
m_vram.fill(0);
|
||||
}
|
||||
|
||||
void GPU_SW::CopyOut15Bit(const u16* src_ptr, u32 src_stride, u32* dst_ptr, u32 dst_stride, u32 width, u32 height)
|
||||
void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced)
|
||||
{
|
||||
for (u32 row = 0; row < height; row++)
|
||||
{
|
||||
const u16* src_row_ptr = src_ptr;
|
||||
u32* dst_row_ptr = dst_ptr;
|
||||
for (u32 col = 0; col < width; col++)
|
||||
*(dst_row_ptr++) = RGBA5551ToRGBA8888(*(src_row_ptr++));
|
||||
const u8 interlaced_shift = BoolToUInt8(interlaced);
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
// Fast path when not wrapping around.
|
||||
if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT)
|
||||
{
|
||||
dst_stride <<= interlaced_shift;
|
||||
height >>= interlaced_shift;
|
||||
|
||||
const u16* src_ptr = &m_vram[src_y * VRAM_WIDTH + src_x];
|
||||
const u32 src_stride = VRAM_WIDTH << interlaced_shift;
|
||||
for (u32 row = 0; row < height; row++)
|
||||
{
|
||||
const u16* src_row_ptr = src_ptr;
|
||||
u32* dst_row_ptr = dst_ptr;
|
||||
for (u32 col = 0; col < width; col++)
|
||||
*(dst_row_ptr++) = RGBA5551ToRGBA8888(*(src_row_ptr++));
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_stride <<= interlaced_shift;
|
||||
height >>= interlaced_shift;
|
||||
|
||||
const u32 end_x = src_x + width;
|
||||
for (u32 row = 0; row < height; row++)
|
||||
{
|
||||
const u16* src_row_ptr = &m_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||
u32* dst_row_ptr = dst_ptr;
|
||||
|
||||
for (u32 col = src_x; col < end_x; col++)
|
||||
*(dst_row_ptr++) = RGBA5551ToRGBA8888(src_row_ptr[col % VRAM_WIDTH]);
|
||||
|
||||
src_y += (1 << interlaced_shift);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_SW::CopyOut24Bit(const u16* src_ptr, u32 src_stride, u32* dst_ptr, u32 dst_stride, u32 width, u32 height)
|
||||
void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced)
|
||||
{
|
||||
for (u32 row = 0; row < height; row++)
|
||||
const u8 interlaced_shift = BoolToUInt8(interlaced);
|
||||
|
||||
if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT)
|
||||
{
|
||||
const u8* src_row_ptr = reinterpret_cast<const u8*>(src_ptr);
|
||||
u32* dst_row_ptr = dst_ptr;
|
||||
dst_stride <<= interlaced_shift;
|
||||
height >>= interlaced_shift;
|
||||
|
||||
// Beware unaligned accesses.
|
||||
for (u32 col = 0; col < width; col++)
|
||||
const u8* src_ptr = reinterpret_cast<const u8*>(&m_vram[src_y * VRAM_WIDTH + src_x]);
|
||||
const u32 src_stride = (VRAM_WIDTH << interlaced_shift) * sizeof(u16);
|
||||
for (u32 row = 0; row < height; row++)
|
||||
{
|
||||
// This will fill the alpha channel with junk, but that's okay since we don't use it
|
||||
std::memcpy(dst_row_ptr, src_row_ptr, sizeof(u32));
|
||||
src_row_ptr += 3;
|
||||
dst_row_ptr++;
|
||||
}
|
||||
const u8* src_row_ptr = src_ptr;
|
||||
u8* dst_row_ptr = reinterpret_cast<u8*>(dst_ptr);
|
||||
for (u32 col = 0; col < width; col++)
|
||||
{
|
||||
*(dst_row_ptr++) = *(src_row_ptr++);
|
||||
*(dst_row_ptr++) = *(src_row_ptr++);
|
||||
*(dst_row_ptr++) = *(src_row_ptr++);
|
||||
*(dst_row_ptr++) = 0xFF;
|
||||
}
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_stride <<= interlaced_shift;
|
||||
height >>= interlaced_shift;
|
||||
|
||||
const u32 end_x = src_x + width;
|
||||
for (u32 row = 0; row < height; row++)
|
||||
{
|
||||
const u16* src_row_ptr = &m_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||
u32* dst_row_ptr = dst_ptr;
|
||||
|
||||
for (u32 col = 0; col < width; col++)
|
||||
{
|
||||
const u32 offset = (src_x + ((col * 3) / 2));
|
||||
const u16 s0 = src_row_ptr[offset % VRAM_WIDTH];
|
||||
const u16 s1 = src_row_ptr[(offset + 1) % VRAM_WIDTH];
|
||||
const u8 shift = static_cast<u8>(col & 1u) * 8;
|
||||
*(dst_row_ptr++) = (((ZeroExtend32(s1) << 16) | ZeroExtend32(s0)) >> shift) | 0xFF000000u;
|
||||
}
|
||||
|
||||
src_y += (1 << interlaced_shift);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -98,34 +159,32 @@ void GPU_SW::UpdateDisplay()
|
|||
const u32 field = GetInterlacedField();
|
||||
if (m_GPUSTAT.display_area_color_depth_24)
|
||||
{
|
||||
CopyOut24Bit(m_vram.data() + (vram_offset_y + field) * VRAM_WIDTH + m_crtc_state.regs.X, VRAM_WIDTH * 2,
|
||||
m_display_texture_buffer.data() + field * display_width, display_width * 2, display_width,
|
||||
display_height / 2);
|
||||
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
|
||||
VRAM_WIDTH, display_width, display_height, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
CopyOut15Bit(m_vram.data() + (vram_offset_y + field) * VRAM_WIDTH + m_crtc_state.regs.X, VRAM_WIDTH * 2,
|
||||
m_display_texture_buffer.data() + field * display_width, display_width * 2, display_width,
|
||||
display_height / 2);
|
||||
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
|
||||
VRAM_WIDTH, display_width, display_height, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_GPUSTAT.display_area_color_depth_24)
|
||||
{
|
||||
CopyOut24Bit(m_vram.data() + vram_offset_y * VRAM_WIDTH + m_crtc_state.regs.X, VRAM_WIDTH,
|
||||
m_display_texture_buffer.data(), display_width, display_width, display_height);
|
||||
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, display_width,
|
||||
display_height, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
CopyOut15Bit(m_vram.data() + vram_offset_y * VRAM_WIDTH + m_crtc_state.regs.X, VRAM_WIDTH,
|
||||
m_display_texture_buffer.data(), display_width, display_width, display_height);
|
||||
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, display_width,
|
||||
display_height, false);
|
||||
}
|
||||
}
|
||||
|
||||
const u32 texture_offset_x = m_crtc_state.display_vram_left - m_crtc_state.regs.X;
|
||||
m_host_display->UpdateTexture(m_display_texture.get(), texture_offset_x, 0, display_width, display_height,
|
||||
m_display_texture_buffer.data(), display_width * sizeof(u32));
|
||||
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
|
||||
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0,
|
||||
display_width, display_height);
|
||||
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
|
||||
|
@ -135,7 +194,7 @@ void GPU_SW::UpdateDisplay()
|
|||
}
|
||||
else
|
||||
{
|
||||
CopyOut15Bit(m_vram.data(), VRAM_WIDTH, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT);
|
||||
CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false);
|
||||
m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
|
||||
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
|
||||
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH,
|
||||
|
|
|
@ -43,10 +43,8 @@ protected:
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
// Scanout
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
static void CopyOut15Bit(const u16* src_ptr, u32 src_stride, u32* dst_ptr, u32 dst_stride, u32 width, u32 height);
|
||||
|
||||
static void CopyOut24Bit(const u16* src_ptr, u32 src_stride, u32* dst_ptr, u32 dst_stride, u32 width, u32 height);
|
||||
|
||||
void CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced);
|
||||
void CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced);
|
||||
void UpdateDisplay() override;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
Loading…
Reference in New Issue