GS: Replace magic alignment number with constant

And ensure it gets used in None preloading.
This commit is contained in:
Stenzek 2023-04-16 14:52:42 +10:00 committed by refractionpcsx2
parent d745564451
commit ef9f0cf635
10 changed files with 45 additions and 29 deletions

View File

@ -15,6 +15,7 @@
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "GS/GSClut.h" #include "GS/GSClut.h"
#include "GS/GSExtra.h"
#include "GS/GSLocalMemory.h" #include "GS/GSLocalMemory.h"
#include "GS/GSGL.h" #include "GS/GSGL.h"
#include "GS/Renderers/Common/GSDevice.h" #include "GS/Renderers/Common/GSDevice.h"
@ -27,7 +28,7 @@ GSClut::GSClut(GSLocalMemory* mem)
static constexpr u32 CLUT_ALLOC_SIZE = 4096 * 2; static constexpr u32 CLUT_ALLOC_SIZE = 4096 * 2;
// 1k + 1k for mirrored area simulating wrapping memory // 1k + 1k for mirrored area simulating wrapping memory
m_clut = static_cast<u16*>(_aligned_malloc(CLUT_ALLOC_SIZE, 32)); m_clut = static_cast<u16*>(_aligned_malloc(CLUT_ALLOC_SIZE, VECTOR_ALIGNMENT));
if (!m_clut) if (!m_clut)
throw std::bad_alloc(); throw std::bad_alloc();

View File

@ -15,8 +15,9 @@
#pragma once #pragma once
#include "GSVector.h" #include "GS/GSVector.h"
#include "pcsx2/Config.h" #include "pcsx2/Config.h"
#include "common/Align.h"
/// Like `memcmp(&a, &b, sizeof(T)) == 0` but faster /// Like `memcmp(&a, &b, sizeof(T)) == 0` but faster
template <typename T> template <typename T>
@ -117,6 +118,17 @@ static constexpr u32 MAX_SKIPPED_DUPLICATE_FRAMES = 3;
extern void* GSAllocateWrappedMemory(size_t size, size_t repeat); extern void* GSAllocateWrappedMemory(size_t size, size_t repeat);
extern void GSFreeWrappedMemory(void* ptr, size_t size, size_t repeat); extern void GSFreeWrappedMemory(void* ptr, size_t size, size_t repeat);
/// We want all allocations and pitches to be aligned to 32-bit, regardless of whether we're
/// SSE4 or AVX2, because of multi-ISA.
static constexpr u32 VECTOR_ALIGNMENT = 32;
/// Aligns allocation/pitch size to preferred host size.
template<typename T>
__fi static inline T VectorAlign(T value)
{
return Common::AlignUpPow2(value, VECTOR_ALIGNMENT);
}
// clang-format off // clang-format off
#ifdef __POSIX__ #ifdef __POSIX__

View File

@ -286,7 +286,7 @@ GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIF
return it->second; return it->second;
} }
GSPixelOffset* off = (GSPixelOffset*)_aligned_malloc(sizeof(GSPixelOffset), 32); GSPixelOffset* off = (GSPixelOffset*)_aligned_malloc(sizeof(GSPixelOffset), VECTOR_ALIGNMENT);
off->hash = hash; off->hash = hash;
off->fbp = fbp; off->fbp = fbp;
@ -339,7 +339,7 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
return it->second; return it->second;
} }
GSPixelOffset4* off = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32); GSPixelOffset4* off = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), VECTOR_ALIGNMENT);
off->hash = hash; off->hash = hash;
off->fbp = fbp; off->fbp = fbp;
@ -544,7 +544,7 @@ void GSLocalMemory::SaveBMP(const std::string& fn, u32 bp, u32 bw, u32 psm, int
{ {
int pitch = w * 4; int pitch = w * 4;
int size = pitch * h; int size = pitch * h;
void* bits = _aligned_malloc(size, 32); void* bits = _aligned_malloc(size, VECTOR_ALIGNMENT);
GIFRegTEX0 TEX0; GIFRegTEX0 TEX0;

View File

@ -19,8 +19,9 @@
#include "common/FileSystem.h" #include "common/FileSystem.h"
#include "common/StringUtil.h" #include "common/StringUtil.h"
#include "GSDump.h" #include "GS/GSDump.h"
#include "GSLzma.h" #include "GS/GSLzma.h"
#include "GS/GSExtra.h"
using namespace GSDumpTypes; using namespace GSDumpTypes;
@ -273,8 +274,8 @@ void GSDumpLzma::Initialize()
} }
m_buff_size = 1024*1024; m_buff_size = 1024*1024;
m_area = (uint8_t*)_aligned_malloc(m_buff_size, 32); m_area = (uint8_t*)_aligned_malloc(m_buff_size, VECTOR_ALIGNMENT);
m_inbuf = (uint8_t*)_aligned_malloc(BUFSIZ, 32); m_inbuf = (uint8_t*)_aligned_malloc(BUFSIZ, VECTOR_ALIGNMENT);
m_avail = 0; m_avail = 0;
m_start = 0; m_start = 0;

View File

@ -40,7 +40,8 @@ GSTextureCache::GSTextureCache()
// In theory 4MB is enough but 9MB is safer for overflow (8MB // In theory 4MB is enough but 9MB is safer for overflow (8MB
// isn't enough in custom resolution) // isn't enough in custom resolution)
// Test: onimusha 3 PAL 60Hz // Test: onimusha 3 PAL 60Hz
s_unswizzle_buffer = (u8*)_aligned_malloc(9 * 1024 * 1024, 32); s_unswizzle_buffer = (u8*)_aligned_malloc(9 * 1024 * 1024, VECTOR_ALIGNMENT);
pxAssertRel(s_unswizzle_buffer, "Failed to allocate unswizzle buffer");
m_surface_offset_cache.reserve(S_SURFACE_OFFSET_CACHE_MAX_SIZE); m_surface_offset_cache.reserve(S_SURFACE_OFFSET_CACHE_MAX_SIZE);
} }
@ -4154,7 +4155,7 @@ void GSTextureCache::Source::UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4
void GSTextureCache::Source::Write(const GSVector4i& r, int layer, const GSOffset& off) void GSTextureCache::Source::Write(const GSVector4i& r, int layer, const GSOffset& off)
{ {
if (!m_write.rect) if (!m_write.rect)
m_write.rect = static_cast<GSVector4i*>(_aligned_malloc(3 * sizeof(GSVector4i), 32)); m_write.rect = static_cast<GSVector4i*>(_aligned_malloc(3 * sizeof(GSVector4i), 16));
m_write.rect[m_write.count++] = r; m_write.rect[m_write.count++] = r;
@ -4214,6 +4215,8 @@ void GSTextureCache::Source::Flush(u32 count, int layer, const GSOffset& off)
rtx = psm.rtxP; rtx = psm.rtxP;
} }
pitch = VectorAlign(pitch);
for (u32 i = 0; i < count; i++) for (u32 i = 0; i < count; i++)
{ {
const GSVector4i r(m_write.rect[i]); const GSVector4i r(m_write.rect[i]);
@ -4385,7 +4388,7 @@ void GSTextureCache::Target::Update(bool reset_age)
} }
else else
{ {
const int pitch = Common::AlignUpPow2(r.width() * sizeof(u32), 32); const int pitch = VectorAlign(r.width() * sizeof(u32));
g_gs_renderer->m_mem.ReadTexture(off, r, s_unswizzle_buffer, pitch, TEXA); g_gs_renderer->m_mem.ReadTexture(off, r, s_unswizzle_buffer, pitch, TEXA);
t->Update(t_r, s_unswizzle_buffer, pitch); t->Update(t_r, s_unswizzle_buffer, pitch);
@ -5241,7 +5244,7 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GST
{ {
// Expand texture indices. Align to 32 bytes for AVX2. // Expand texture indices. Align to 32 bytes for AVX2.
const bool palette = (psm.pal > 0); const bool palette = (psm.pal > 0);
const u32 pitch = Common::AlignUpPow2(static_cast<u32>(block_rect.z) << (palette ? 0 : 2), 32); const u32 pitch = VectorAlign(static_cast<u32>(block_rect.z) << (palette ? 0 : 2));
const u32 row_size = static_cast<u32>(tw) << (palette ? 0 : 2); const u32 row_size = static_cast<u32>(tw) << (palette ? 0 : 2);
const GSLocalMemory::readTexture rtx = palette ? psm.rtxP : psm.rtx; const GSLocalMemory::readTexture rtx = palette ? psm.rtxP : psm.rtx;
@ -5317,8 +5320,7 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
} }
else else
{ {
// Align pitch to 32 bytes for AVX2 if we're going through the temp buffer path. pitch = VectorAlign(pitch);
pitch = Common::AlignUpPow2(pitch, 32);
u8* buff = s_unswizzle_buffer; u8* buff = s_unswizzle_buffer;
rtx(mem, off, block_rect, buff, pitch, TEXA); rtx(mem, off, block_rect, buff, pitch, TEXA);

View File

@ -18,6 +18,7 @@
#include "GS/Renderers/OpenGL/GSDeviceOGL.h" #include "GS/Renderers/OpenGL/GSDeviceOGL.h"
#include "GS/Renderers/OpenGL/GSTextureOGL.h" #include "GS/Renderers/OpenGL/GSTextureOGL.h"
#include "GS/Renderers/OpenGL/GLState.h" #include "GS/Renderers/OpenGL/GLState.h"
#include "GS/GSExtra.h"
#include "GS/GSPerfMon.h" #include "GS/GSPerfMon.h"
#include "GS/GSPng.h" #include "GS/GSPng.h"
#include "GS/GSGL.h" #include "GS/GSGL.h"
@ -476,7 +477,7 @@ std::unique_ptr<GSDownloadTextureOGL> GSDownloadTextureOGL::Create(u32 width, u3
} }
// Fallback to glReadPixels() + CPU buffer. // Fallback to glReadPixels() + CPU buffer.
u8* cpu_buffer = static_cast<u8*>(_aligned_malloc(buffer_size, 32)); u8* cpu_buffer = static_cast<u8*>(_aligned_malloc(buffer_size, VECTOR_ALIGNMENT));
if (!cpu_buffer) if (!cpu_buffer)
return {}; return {};

View File

@ -55,7 +55,7 @@ GSRasterizer::GSRasterizer(GSDrawScanline* ds, int id, int threads)
m_thread_height = compute_best_thread_height(threads); m_thread_height = compute_best_thread_height(threads);
m_edge.buff = static_cast<GSVertexSW*>(_aligned_malloc(sizeof(GSVertexSW) * 2048, 32)); m_edge.buff = static_cast<GSVertexSW*>(_aligned_malloc(sizeof(GSVertexSW) * 2048, VECTOR_ALIGNMENT));
m_edge.count = 0; m_edge.count = 0;
if (!m_edge.buff) if (!m_edge.buff)
throw std::bad_alloc(); throw std::bad_alloc();

View File

@ -39,7 +39,7 @@ GSRendererSW::GSRendererSW(int threads)
m_tc = std::make_unique<GSTextureCacheSW>(); m_tc = std::make_unique<GSTextureCacheSW>();
m_rl = GSRasterizerList::Create(threads); m_rl = GSRasterizerList::Create(threads);
m_output = (u8*)_aligned_malloc(1024 * 1024 * sizeof(u32), 32); m_output = (u8*)_aligned_malloc(1024 * 1024 * sizeof(u32), VECTOR_ALIGNMENT);
std::fill(std::begin(m_fzb_pages), std::end(m_fzb_pages), 0); std::fill(std::begin(m_fzb_pages), std::end(m_fzb_pages), 0);
std::fill(std::begin(m_tex_pages), std::end(m_tex_pages), 0); std::fill(std::begin(m_tex_pages), std::end(m_tex_pages), 0);
@ -1046,7 +1046,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
{ {
gd.sel.tlu = 1; gd.sel.tlu = 1;
gd.clut = (u32*)m_vertex_heap.alloc(sizeof(u32) * 256, 32); // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats gd.clut = (u32*)m_vertex_heap.alloc(sizeof(u32) * 256, VECTOR_ALIGNMENT); // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal); memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
} }
@ -1333,7 +1333,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
ExpandDIMX(m_dimx, env.DIMX); ExpandDIMX(m_dimx, env.DIMX);
} }
gd.dimx = (GSVector4i*)m_vertex_heap.alloc(sizeof(m_dimx), 32); gd.dimx = (GSVector4i*)m_vertex_heap.alloc(sizeof(m_dimx), VECTOR_ALIGNMENT);
std::memcpy(gd.dimx, m_dimx, sizeof(m_dimx)); std::memcpy(gd.dimx, m_dimx, sizeof(m_dimx));
} }

View File

@ -14,7 +14,8 @@
*/ */
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "GSTextureCacheSW.h" #include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/GSExtra.h"
GSTextureCacheSW::GSTextureCacheSW() = default; GSTextureCacheSW::GSTextureCacheSW() = default;
@ -233,14 +234,11 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
if (m_buff == NULL) if (m_buff == NULL)
{ {
u32 pitch = (1 << m_tw) << shift; const u32 pitch = (1 << m_tw) << shift;
m_buff = _aligned_malloc(pitch * th * 4, 32); m_buff = _aligned_malloc(pitch * th * 4, VECTOR_ALIGNMENT);
if (!m_buff)
if (m_buff == NULL)
{
return false; return false;
}
} }
GSLocalMemory& mem = g_gs_renderer->m_mem; GSLocalMemory& mem = g_gs_renderer->m_mem;

View File

@ -14,7 +14,8 @@
*/ */
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "GSTextureSW.h" #include "GS/Renderers/SW/GSTextureSW.h"
#include "GS/GSExtra.h"
#include "GS/GSPng.h" #include "GS/GSPng.h"
GSTextureSW::GSTextureSW(Type type, int width, int height) GSTextureSW::GSTextureSW(Type type, int width, int height)
@ -24,7 +25,7 @@ GSTextureSW::GSTextureSW(Type type, int width, int height)
m_type = type; m_type = type;
m_format = Format::Invalid; m_format = Format::Invalid;
m_pitch = ((width << 2) + 31) & ~31; m_pitch = ((width << 2) + 31) & ~31;
m_data = _aligned_malloc(m_pitch * height, 32); m_data = _aligned_malloc(m_pitch * height, VECTOR_ALIGNMENT);
} }
GSTextureSW::~GSTextureSW() GSTextureSW::~GSTextureSW()