From 0a292715cf1b04d043718f1917bb46fd9b7636a5 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 9 Feb 2023 15:11:53 +1000 Subject: [PATCH] GS/OGL: Align texture uploads to 64 bytes Fixes potential crash in some games with odd-sized targets and preloading (e.g. Densha De Go). --- pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp | 35 +++++++++++++++------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp index d4130288db..945e11e83d 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp @@ -21,10 +21,17 @@ #include "GS/GSPerfMon.h" #include "GS/GSPng.h" #include "GS/GSGL.h" +#include "common/Align.h" #include "common/AlignedMalloc.h" #include "common/StringUtil.h" -static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 256; +// Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems +// to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here. +static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64; + +// The pitch alignment must be less or equal to the upload alignment. +// We need 32 here for AVX2, so 64 is also fine. +static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64; GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format format) { @@ -214,8 +221,8 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int m_clean = false; - u32 row_byte = r.width() << m_int_shift; - u32 map_size = r.height() * row_byte; + const u32 preferred_pitch = Common::AlignUpPow2(r.width() << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 map_size = r.height() * preferred_pitch; #if 0 if (r.height() == 1) { @@ -250,7 +257,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int GL::StreamBuffer* const sb = GSDeviceOGL::GetTextureUploadBuffer(); const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); - StringUtil::StrideMemCpy(map.pointer, row_byte, data, pitch, row_byte, r.height()); + StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, r.width() << m_int_shift, r.height()); sb->Unmap(map_size); sb->Bind(); @@ -275,13 +282,13 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer) ASSERT(r.width() != 0); ASSERT(r.height() != 0); - u32 row_byte = r.width() << m_int_shift; - m.pitch = row_byte; + const u32 pitch = Common::AlignUpPow2(r.width() << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT); + m.pitch = pitch; if (m_type == Type::Texture || m_type == Type::RenderTarget) { - const u32 map_size = r.height() * row_byte; - if (GLLoader::buggy_pbo || map_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize()) + const u32 upload_size = CalcUploadSize(r.height(), pitch); + if (GLLoader::buggy_pbo || upload_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize()) return false; GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap @@ -289,7 +296,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer) m_clean = false; - const auto map = GSDeviceOGL::GetTextureUploadBuffer()->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); + const auto map = GSDeviceOGL::GetTextureUploadBuffer()->Map(TEXTURE_UPLOAD_ALIGNMENT, upload_size); m.bits = static_cast(map.pointer); // Save the area for the unmap @@ -310,14 +317,20 @@ void GSTextureOGL::Unmap() { if (m_type == Type::Texture || m_type == Type::RenderTarget) { - const u32 map_size = (m_r_w << m_int_shift) * m_r_h; + const u32 pitch = Common::AlignUpPow2(m_r_w << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 upload_size = pitch * m_r_h; GL::StreamBuffer* sb = GSDeviceOGL::GetTextureUploadBuffer(); - sb->Unmap(map_size); + sb->Unmap(upload_size); sb->Bind(); + const u32 row_length = CalcUploadRowLengthFromPitch(pitch); + glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length); + glTextureSubImage2D(m_texture_id, m_layer, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type, reinterpret_cast(static_cast(m_map_offset))); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + sb->Unbind(); m_needs_mipmaps_generated = true;