mirror of https://github.com/PCSX2/pcsx2.git
GS: Replace magic alignment number with constant
And ensure it gets used in None preloading.
This commit is contained in:
parent
d745564451
commit
ef9f0cf635
|
@ -15,6 +15,7 @@
|
||||||
|
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "GS/GSClut.h"
|
#include "GS/GSClut.h"
|
||||||
|
#include "GS/GSExtra.h"
|
||||||
#include "GS/GSLocalMemory.h"
|
#include "GS/GSLocalMemory.h"
|
||||||
#include "GS/GSGL.h"
|
#include "GS/GSGL.h"
|
||||||
#include "GS/Renderers/Common/GSDevice.h"
|
#include "GS/Renderers/Common/GSDevice.h"
|
||||||
|
@ -27,7 +28,7 @@ GSClut::GSClut(GSLocalMemory* mem)
|
||||||
static constexpr u32 CLUT_ALLOC_SIZE = 4096 * 2;
|
static constexpr u32 CLUT_ALLOC_SIZE = 4096 * 2;
|
||||||
|
|
||||||
// 1k + 1k for mirrored area simulating wrapping memory
|
// 1k + 1k for mirrored area simulating wrapping memory
|
||||||
m_clut = static_cast<u16*>(_aligned_malloc(CLUT_ALLOC_SIZE, 32));
|
m_clut = static_cast<u16*>(_aligned_malloc(CLUT_ALLOC_SIZE, VECTOR_ALIGNMENT));
|
||||||
if (!m_clut)
|
if (!m_clut)
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
|
|
||||||
|
|
|
@ -15,8 +15,9 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "GSVector.h"
|
#include "GS/GSVector.h"
|
||||||
#include "pcsx2/Config.h"
|
#include "pcsx2/Config.h"
|
||||||
|
#include "common/Align.h"
|
||||||
|
|
||||||
/// Like `memcmp(&a, &b, sizeof(T)) == 0` but faster
|
/// Like `memcmp(&a, &b, sizeof(T)) == 0` but faster
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -117,6 +118,17 @@ static constexpr u32 MAX_SKIPPED_DUPLICATE_FRAMES = 3;
|
||||||
extern void* GSAllocateWrappedMemory(size_t size, size_t repeat);
|
extern void* GSAllocateWrappedMemory(size_t size, size_t repeat);
|
||||||
extern void GSFreeWrappedMemory(void* ptr, size_t size, size_t repeat);
|
extern void GSFreeWrappedMemory(void* ptr, size_t size, size_t repeat);
|
||||||
|
|
||||||
|
/// We want all allocations and pitches to be aligned to 32-bit, regardless of whether we're
|
||||||
|
/// SSE4 or AVX2, because of multi-ISA.
|
||||||
|
static constexpr u32 VECTOR_ALIGNMENT = 32;
|
||||||
|
|
||||||
|
/// Aligns allocation/pitch size to preferred host size.
|
||||||
|
template<typename T>
|
||||||
|
__fi static inline T VectorAlign(T value)
|
||||||
|
{
|
||||||
|
return Common::AlignUpPow2(value, VECTOR_ALIGNMENT);
|
||||||
|
}
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
|
||||||
#ifdef __POSIX__
|
#ifdef __POSIX__
|
||||||
|
|
|
@ -286,7 +286,7 @@ GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIF
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSPixelOffset* off = (GSPixelOffset*)_aligned_malloc(sizeof(GSPixelOffset), 32);
|
GSPixelOffset* off = (GSPixelOffset*)_aligned_malloc(sizeof(GSPixelOffset), VECTOR_ALIGNMENT);
|
||||||
|
|
||||||
off->hash = hash;
|
off->hash = hash;
|
||||||
off->fbp = fbp;
|
off->fbp = fbp;
|
||||||
|
@ -339,7 +339,7 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSPixelOffset4* off = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32);
|
GSPixelOffset4* off = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), VECTOR_ALIGNMENT);
|
||||||
|
|
||||||
off->hash = hash;
|
off->hash = hash;
|
||||||
off->fbp = fbp;
|
off->fbp = fbp;
|
||||||
|
@ -544,7 +544,7 @@ void GSLocalMemory::SaveBMP(const std::string& fn, u32 bp, u32 bw, u32 psm, int
|
||||||
{
|
{
|
||||||
int pitch = w * 4;
|
int pitch = w * 4;
|
||||||
int size = pitch * h;
|
int size = pitch * h;
|
||||||
void* bits = _aligned_malloc(size, 32);
|
void* bits = _aligned_malloc(size, VECTOR_ALIGNMENT);
|
||||||
|
|
||||||
GIFRegTEX0 TEX0;
|
GIFRegTEX0 TEX0;
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,9 @@
|
||||||
#include "common/FileSystem.h"
|
#include "common/FileSystem.h"
|
||||||
#include "common/StringUtil.h"
|
#include "common/StringUtil.h"
|
||||||
|
|
||||||
#include "GSDump.h"
|
#include "GS/GSDump.h"
|
||||||
#include "GSLzma.h"
|
#include "GS/GSLzma.h"
|
||||||
|
#include "GS/GSExtra.h"
|
||||||
|
|
||||||
using namespace GSDumpTypes;
|
using namespace GSDumpTypes;
|
||||||
|
|
||||||
|
@ -273,8 +274,8 @@ void GSDumpLzma::Initialize()
|
||||||
}
|
}
|
||||||
|
|
||||||
m_buff_size = 1024*1024;
|
m_buff_size = 1024*1024;
|
||||||
m_area = (uint8_t*)_aligned_malloc(m_buff_size, 32);
|
m_area = (uint8_t*)_aligned_malloc(m_buff_size, VECTOR_ALIGNMENT);
|
||||||
m_inbuf = (uint8_t*)_aligned_malloc(BUFSIZ, 32);
|
m_inbuf = (uint8_t*)_aligned_malloc(BUFSIZ, VECTOR_ALIGNMENT);
|
||||||
m_avail = 0;
|
m_avail = 0;
|
||||||
m_start = 0;
|
m_start = 0;
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,8 @@ GSTextureCache::GSTextureCache()
|
||||||
// In theory 4MB is enough but 9MB is safer for overflow (8MB
|
// In theory 4MB is enough but 9MB is safer for overflow (8MB
|
||||||
// isn't enough in custom resolution)
|
// isn't enough in custom resolution)
|
||||||
// Test: onimusha 3 PAL 60Hz
|
// Test: onimusha 3 PAL 60Hz
|
||||||
s_unswizzle_buffer = (u8*)_aligned_malloc(9 * 1024 * 1024, 32);
|
s_unswizzle_buffer = (u8*)_aligned_malloc(9 * 1024 * 1024, VECTOR_ALIGNMENT);
|
||||||
|
pxAssertRel(s_unswizzle_buffer, "Failed to allocate unswizzle buffer");
|
||||||
|
|
||||||
m_surface_offset_cache.reserve(S_SURFACE_OFFSET_CACHE_MAX_SIZE);
|
m_surface_offset_cache.reserve(S_SURFACE_OFFSET_CACHE_MAX_SIZE);
|
||||||
}
|
}
|
||||||
|
@ -4154,7 +4155,7 @@ void GSTextureCache::Source::UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4
|
||||||
void GSTextureCache::Source::Write(const GSVector4i& r, int layer, const GSOffset& off)
|
void GSTextureCache::Source::Write(const GSVector4i& r, int layer, const GSOffset& off)
|
||||||
{
|
{
|
||||||
if (!m_write.rect)
|
if (!m_write.rect)
|
||||||
m_write.rect = static_cast<GSVector4i*>(_aligned_malloc(3 * sizeof(GSVector4i), 32));
|
m_write.rect = static_cast<GSVector4i*>(_aligned_malloc(3 * sizeof(GSVector4i), 16));
|
||||||
|
|
||||||
m_write.rect[m_write.count++] = r;
|
m_write.rect[m_write.count++] = r;
|
||||||
|
|
||||||
|
@ -4214,6 +4215,8 @@ void GSTextureCache::Source::Flush(u32 count, int layer, const GSOffset& off)
|
||||||
rtx = psm.rtxP;
|
rtx = psm.rtxP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pitch = VectorAlign(pitch);
|
||||||
|
|
||||||
for (u32 i = 0; i < count; i++)
|
for (u32 i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
const GSVector4i r(m_write.rect[i]);
|
const GSVector4i r(m_write.rect[i]);
|
||||||
|
@ -4385,7 +4388,7 @@ void GSTextureCache::Target::Update(bool reset_age)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const int pitch = Common::AlignUpPow2(r.width() * sizeof(u32), 32);
|
const int pitch = VectorAlign(r.width() * sizeof(u32));
|
||||||
g_gs_renderer->m_mem.ReadTexture(off, r, s_unswizzle_buffer, pitch, TEXA);
|
g_gs_renderer->m_mem.ReadTexture(off, r, s_unswizzle_buffer, pitch, TEXA);
|
||||||
|
|
||||||
t->Update(t_r, s_unswizzle_buffer, pitch);
|
t->Update(t_r, s_unswizzle_buffer, pitch);
|
||||||
|
@ -5241,7 +5244,7 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GST
|
||||||
{
|
{
|
||||||
// Expand texture indices. Align to 32 bytes for AVX2.
|
// Expand texture indices. Align to 32 bytes for AVX2.
|
||||||
const bool palette = (psm.pal > 0);
|
const bool palette = (psm.pal > 0);
|
||||||
const u32 pitch = Common::AlignUpPow2(static_cast<u32>(block_rect.z) << (palette ? 0 : 2), 32);
|
const u32 pitch = VectorAlign(static_cast<u32>(block_rect.z) << (palette ? 0 : 2));
|
||||||
const u32 row_size = static_cast<u32>(tw) << (palette ? 0 : 2);
|
const u32 row_size = static_cast<u32>(tw) << (palette ? 0 : 2);
|
||||||
const GSLocalMemory::readTexture rtx = palette ? psm.rtxP : psm.rtx;
|
const GSLocalMemory::readTexture rtx = palette ? psm.rtxP : psm.rtx;
|
||||||
|
|
||||||
|
@ -5317,8 +5320,7 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Align pitch to 32 bytes for AVX2 if we're going through the temp buffer path.
|
pitch = VectorAlign(pitch);
|
||||||
pitch = Common::AlignUpPow2(pitch, 32);
|
|
||||||
|
|
||||||
u8* buff = s_unswizzle_buffer;
|
u8* buff = s_unswizzle_buffer;
|
||||||
rtx(mem, off, block_rect, buff, pitch, TEXA);
|
rtx(mem, off, block_rect, buff, pitch, TEXA);
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "GS/Renderers/OpenGL/GSDeviceOGL.h"
|
#include "GS/Renderers/OpenGL/GSDeviceOGL.h"
|
||||||
#include "GS/Renderers/OpenGL/GSTextureOGL.h"
|
#include "GS/Renderers/OpenGL/GSTextureOGL.h"
|
||||||
#include "GS/Renderers/OpenGL/GLState.h"
|
#include "GS/Renderers/OpenGL/GLState.h"
|
||||||
|
#include "GS/GSExtra.h"
|
||||||
#include "GS/GSPerfMon.h"
|
#include "GS/GSPerfMon.h"
|
||||||
#include "GS/GSPng.h"
|
#include "GS/GSPng.h"
|
||||||
#include "GS/GSGL.h"
|
#include "GS/GSGL.h"
|
||||||
|
@ -476,7 +477,7 @@ std::unique_ptr<GSDownloadTextureOGL> GSDownloadTextureOGL::Create(u32 width, u3
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to glReadPixels() + CPU buffer.
|
// Fallback to glReadPixels() + CPU buffer.
|
||||||
u8* cpu_buffer = static_cast<u8*>(_aligned_malloc(buffer_size, 32));
|
u8* cpu_buffer = static_cast<u8*>(_aligned_malloc(buffer_size, VECTOR_ALIGNMENT));
|
||||||
if (!cpu_buffer)
|
if (!cpu_buffer)
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@ GSRasterizer::GSRasterizer(GSDrawScanline* ds, int id, int threads)
|
||||||
|
|
||||||
m_thread_height = compute_best_thread_height(threads);
|
m_thread_height = compute_best_thread_height(threads);
|
||||||
|
|
||||||
m_edge.buff = static_cast<GSVertexSW*>(_aligned_malloc(sizeof(GSVertexSW) * 2048, 32));
|
m_edge.buff = static_cast<GSVertexSW*>(_aligned_malloc(sizeof(GSVertexSW) * 2048, VECTOR_ALIGNMENT));
|
||||||
m_edge.count = 0;
|
m_edge.count = 0;
|
||||||
if (!m_edge.buff)
|
if (!m_edge.buff)
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
|
|
|
@ -39,7 +39,7 @@ GSRendererSW::GSRendererSW(int threads)
|
||||||
m_tc = std::make_unique<GSTextureCacheSW>();
|
m_tc = std::make_unique<GSTextureCacheSW>();
|
||||||
m_rl = GSRasterizerList::Create(threads);
|
m_rl = GSRasterizerList::Create(threads);
|
||||||
|
|
||||||
m_output = (u8*)_aligned_malloc(1024 * 1024 * sizeof(u32), 32);
|
m_output = (u8*)_aligned_malloc(1024 * 1024 * sizeof(u32), VECTOR_ALIGNMENT);
|
||||||
|
|
||||||
std::fill(std::begin(m_fzb_pages), std::end(m_fzb_pages), 0);
|
std::fill(std::begin(m_fzb_pages), std::end(m_fzb_pages), 0);
|
||||||
std::fill(std::begin(m_tex_pages), std::end(m_tex_pages), 0);
|
std::fill(std::begin(m_tex_pages), std::end(m_tex_pages), 0);
|
||||||
|
@ -1046,7 +1046,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
{
|
{
|
||||||
gd.sel.tlu = 1;
|
gd.sel.tlu = 1;
|
||||||
|
|
||||||
gd.clut = (u32*)m_vertex_heap.alloc(sizeof(u32) * 256, 32); // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
|
gd.clut = (u32*)m_vertex_heap.alloc(sizeof(u32) * 256, VECTOR_ALIGNMENT); // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
|
||||||
|
|
||||||
memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
|
memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
|
||||||
}
|
}
|
||||||
|
@ -1333,7 +1333,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
ExpandDIMX(m_dimx, env.DIMX);
|
ExpandDIMX(m_dimx, env.DIMX);
|
||||||
}
|
}
|
||||||
|
|
||||||
gd.dimx = (GSVector4i*)m_vertex_heap.alloc(sizeof(m_dimx), 32);
|
gd.dimx = (GSVector4i*)m_vertex_heap.alloc(sizeof(m_dimx), VECTOR_ALIGNMENT);
|
||||||
|
|
||||||
std::memcpy(gd.dimx, m_dimx, sizeof(m_dimx));
|
std::memcpy(gd.dimx, m_dimx, sizeof(m_dimx));
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "GSTextureCacheSW.h"
|
#include "GS/Renderers/SW/GSTextureCacheSW.h"
|
||||||
|
#include "GS/GSExtra.h"
|
||||||
|
|
||||||
GSTextureCacheSW::GSTextureCacheSW() = default;
|
GSTextureCacheSW::GSTextureCacheSW() = default;
|
||||||
|
|
||||||
|
@ -233,14 +234,11 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
|
|
||||||
if (m_buff == NULL)
|
if (m_buff == NULL)
|
||||||
{
|
{
|
||||||
u32 pitch = (1 << m_tw) << shift;
|
const u32 pitch = (1 << m_tw) << shift;
|
||||||
|
|
||||||
m_buff = _aligned_malloc(pitch * th * 4, 32);
|
m_buff = _aligned_malloc(pitch * th * 4, VECTOR_ALIGNMENT);
|
||||||
|
if (!m_buff)
|
||||||
if (m_buff == NULL)
|
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GSLocalMemory& mem = g_gs_renderer->m_mem;
|
GSLocalMemory& mem = g_gs_renderer->m_mem;
|
||||||
|
|
|
@ -14,7 +14,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "GSTextureSW.h"
|
#include "GS/Renderers/SW/GSTextureSW.h"
|
||||||
|
#include "GS/GSExtra.h"
|
||||||
#include "GS/GSPng.h"
|
#include "GS/GSPng.h"
|
||||||
|
|
||||||
GSTextureSW::GSTextureSW(Type type, int width, int height)
|
GSTextureSW::GSTextureSW(Type type, int width, int height)
|
||||||
|
@ -24,7 +25,7 @@ GSTextureSW::GSTextureSW(Type type, int width, int height)
|
||||||
m_type = type;
|
m_type = type;
|
||||||
m_format = Format::Invalid;
|
m_format = Format::Invalid;
|
||||||
m_pitch = ((width << 2) + 31) & ~31;
|
m_pitch = ((width << 2) + 31) & ~31;
|
||||||
m_data = _aligned_malloc(m_pitch * height, 32);
|
m_data = _aligned_malloc(m_pitch * height, VECTOR_ALIGNMENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
GSTextureSW::~GSTextureSW()
|
GSTextureSW::~GSTextureSW()
|
||||||
|
|
Loading…
Reference in New Issue