diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 3eaed5f06..3ff74b33a 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -698,7 +698,35 @@ void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {} void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) {} -void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) {} +void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) +{ + // Fast path when the copy is not oversized. + if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT) + { + const u16* src_ptr = static_cast(data); + u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x]; + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + std::copy_n(src_ptr, width, dst_ptr); + src_ptr += width; + dst_ptr += VRAM_WIDTH; + } + } + else + { + // Slow path when we need to handle wrap-around. + const u16* src_ptr = static_cast(data); + for (u32 row = 0; row < height;) + { + u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; + for (u32 col = 0; col < width;) + { + // TODO: Handle unaligned reads... + dst_row_ptr[(x + col++) % VRAM_WIDTH] = *(src_ptr++); + } + } + } +} void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) {} diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 83067fc08..eebcf1a8f 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -286,13 +286,6 @@ bool GPU::HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 comma Log_DebugPrintf("Copy rectangle from CPU to VRAM offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, copy_width, copy_height); - if ((dst_x + copy_width) > VRAM_WIDTH || (dst_y + copy_height) > VRAM_HEIGHT) - { - Log_ErrorPrintf("Out of bounds CPU->VRAM copy (%u,%u) @ (%u,%u)", copy_width, copy_height, dst_x, dst_y); - command_ptr += num_words; - return true; - } - if (m_system->GetSettings().debugging.dump_cpu_to_vram_copies) { DumpVRAMToFile(SmallString::FromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++), copy_width, copy_height, @@ -321,12 +314,6 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 comma Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", src_x, src_y, width, height); - if ((src_x + width) > VRAM_WIDTH || (src_y + height) > VRAM_HEIGHT) - { - Panic("Out of bounds VRAM copy"); - return true; - } - // all rendering should be done first... FlushRender(); diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index e304addaf..4b48bed07 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -638,6 +638,16 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d { GPU_HW::UpdateVRAM(x, y, width, height, data); + if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) + { + // CPU round trip if oversized for now. + Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); + GPU::UpdateVRAM(x, y, width, height, data); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + return; + } + const u32 num_pixels = width * height; const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 1f442c7ab..e95600124 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -583,6 +583,16 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* { GPU_HW::UpdateVRAM(x, y, width, height, data); + if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) + { + // CPU round trip if oversized for now. + Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); + GPU::UpdateVRAM(x, y, width, height, data); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + return; + } + const u32 num_pixels = width * height; if (num_pixels < m_max_texture_buffer_size) { diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index d2458d038..825b11dc5 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -50,17 +50,6 @@ void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) std::fill_n(GetPixelPtr(x, y + yoffs), width, color16); } -void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) -{ - const u16* src_ptr = static_cast(data); - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - u16* dst_ptr = GetPixelPtr(x, y + yoffs); - std::copy_n(src_ptr, width, dst_ptr); - src_ptr += width; - } -} - void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) { for (u32 yoffs = 0; yoffs < height; yoffs++) diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 8a93495c8..da684eb56 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -36,7 +36,6 @@ protected: void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; //////////////////////////////////////////////////////////////////////////