GS/OGL: Align texture uploads to 64 bytes

Fixes potential crash in some games with odd-sized targets
and preloading (e.g. Densha De Go).
This commit is contained in:
Stenzek 2023-02-09 15:11:53 +10:00 committed by lightningterror
parent 52f034a513
commit 0a292715cf
1 changed files with 24 additions and 11 deletions

View File

@ -21,10 +21,17 @@
#include "GS/GSPerfMon.h" #include "GS/GSPerfMon.h"
#include "GS/GSPng.h" #include "GS/GSPng.h"
#include "GS/GSGL.h" #include "GS/GSGL.h"
#include "common/Align.h"
#include "common/AlignedMalloc.h" #include "common/AlignedMalloc.h"
#include "common/StringUtil.h" #include "common/StringUtil.h"
static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 256; // Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems
// to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here.
static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64;
// The pitch alignment must be less or equal to the upload alignment.
// We need 32 here for AVX2, so 64 is also fine.
static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64;
GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format format) GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format format)
{ {
@ -214,8 +221,8 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
m_clean = false; m_clean = false;
u32 row_byte = r.width() << m_int_shift; const u32 preferred_pitch = Common::AlignUpPow2(r.width() << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
u32 map_size = r.height() * row_byte; const u32 map_size = r.height() * preferred_pitch;
#if 0 #if 0
if (r.height() == 1) { if (r.height() == 1) {
@ -250,7 +257,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
GL::StreamBuffer* const sb = GSDeviceOGL::GetTextureUploadBuffer(); GL::StreamBuffer* const sb = GSDeviceOGL::GetTextureUploadBuffer();
const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
StringUtil::StrideMemCpy(map.pointer, row_byte, data, pitch, row_byte, r.height()); StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, r.width() << m_int_shift, r.height());
sb->Unmap(map_size); sb->Unmap(map_size);
sb->Bind(); sb->Bind();
@ -275,13 +282,13 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
ASSERT(r.width() != 0); ASSERT(r.width() != 0);
ASSERT(r.height() != 0); ASSERT(r.height() != 0);
u32 row_byte = r.width() << m_int_shift; const u32 pitch = Common::AlignUpPow2(r.width() << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
m.pitch = row_byte; m.pitch = pitch;
if (m_type == Type::Texture || m_type == Type::RenderTarget) if (m_type == Type::Texture || m_type == Type::RenderTarget)
{ {
const u32 map_size = r.height() * row_byte; const u32 upload_size = CalcUploadSize(r.height(), pitch);
if (GLLoader::buggy_pbo || map_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize()) if (GLLoader::buggy_pbo || upload_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize())
return false; return false;
GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap
@ -289,7 +296,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
m_clean = false; m_clean = false;
const auto map = GSDeviceOGL::GetTextureUploadBuffer()->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); const auto map = GSDeviceOGL::GetTextureUploadBuffer()->Map(TEXTURE_UPLOAD_ALIGNMENT, upload_size);
m.bits = static_cast<u8*>(map.pointer); m.bits = static_cast<u8*>(map.pointer);
// Save the area for the unmap // Save the area for the unmap
@ -310,14 +317,20 @@ void GSTextureOGL::Unmap()
{ {
if (m_type == Type::Texture || m_type == Type::RenderTarget) if (m_type == Type::Texture || m_type == Type::RenderTarget)
{ {
const u32 map_size = (m_r_w << m_int_shift) * m_r_h; const u32 pitch = Common::AlignUpPow2(m_r_w << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 upload_size = pitch * m_r_h;
GL::StreamBuffer* sb = GSDeviceOGL::GetTextureUploadBuffer(); GL::StreamBuffer* sb = GSDeviceOGL::GetTextureUploadBuffer();
sb->Unmap(map_size); sb->Unmap(upload_size);
sb->Bind(); sb->Bind();
const u32 row_length = CalcUploadRowLengthFromPitch(pitch);
glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length);
glTextureSubImage2D(m_texture_id, m_layer, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type, glTextureSubImage2D(m_texture_id, m_layer, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset))); reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
sb->Unbind(); sb->Unbind();
m_needs_mipmaps_generated = true; m_needs_mipmaps_generated = true;