GS/OGL: Align texture uploads to 64 bytes

Fixes potential crash in some games with odd-sized targets
and preloading (e.g. Densha De Go).
This commit is contained in:
Stenzek 2023-02-09 15:11:53 +10:00 committed by lightningterror
parent 52f034a513
commit 0a292715cf
1 changed files with 24 additions and 11 deletions

View File

@ -21,10 +21,17 @@
#include "GS/GSPerfMon.h"
#include "GS/GSPng.h"
#include "GS/GSGL.h"
#include "common/Align.h"
#include "common/AlignedMalloc.h"
#include "common/StringUtil.h"
static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 256;
// Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems
// to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here.
static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64;
// The pitch alignment must be less or equal to the upload alignment.
// We need 32 here for AVX2, so 64 is also fine.
static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64;
GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format format)
{
@ -214,8 +221,8 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
m_clean = false;
u32 row_byte = r.width() << m_int_shift;
u32 map_size = r.height() * row_byte;
const u32 preferred_pitch = Common::AlignUpPow2(r.width() << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 map_size = r.height() * preferred_pitch;
#if 0
if (r.height() == 1) {
@ -250,7 +257,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
GL::StreamBuffer* const sb = GSDeviceOGL::GetTextureUploadBuffer();
const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
StringUtil::StrideMemCpy(map.pointer, row_byte, data, pitch, row_byte, r.height());
StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, r.width() << m_int_shift, r.height());
sb->Unmap(map_size);
sb->Bind();
@ -275,13 +282,13 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
ASSERT(r.width() != 0);
ASSERT(r.height() != 0);
u32 row_byte = r.width() << m_int_shift;
m.pitch = row_byte;
const u32 pitch = Common::AlignUpPow2(r.width() << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
m.pitch = pitch;
if (m_type == Type::Texture || m_type == Type::RenderTarget)
{
const u32 map_size = r.height() * row_byte;
if (GLLoader::buggy_pbo || map_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize())
const u32 upload_size = CalcUploadSize(r.height(), pitch);
if (GLLoader::buggy_pbo || upload_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize())
return false;
GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap
@ -289,7 +296,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
m_clean = false;
const auto map = GSDeviceOGL::GetTextureUploadBuffer()->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
const auto map = GSDeviceOGL::GetTextureUploadBuffer()->Map(TEXTURE_UPLOAD_ALIGNMENT, upload_size);
m.bits = static_cast<u8*>(map.pointer);
// Save the area for the unmap
@ -310,14 +317,20 @@ void GSTextureOGL::Unmap()
{
if (m_type == Type::Texture || m_type == Type::RenderTarget)
{
const u32 map_size = (m_r_w << m_int_shift) * m_r_h;
const u32 pitch = Common::AlignUpPow2(m_r_w << m_int_shift, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 upload_size = pitch * m_r_h;
GL::StreamBuffer* sb = GSDeviceOGL::GetTextureUploadBuffer();
sb->Unmap(map_size);
sb->Unmap(upload_size);
sb->Bind();
const u32 row_length = CalcUploadRowLengthFromPitch(pitch);
glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length);
glTextureSubImage2D(m_texture_id, m_layer, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
sb->Unbind();
m_needs_mipmaps_generated = true;