mirror of https://github.com/PCSX2/pcsx2.git
GregMiscellaneous: zzogl-pg: Switch to _aligned_malloc in GetMemoryTarget, take 2. (If at first you don't succeed...)
git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3809 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
baa78266f2
commit
1fe66e55e7
|
@ -2202,13 +2202,13 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
|
|
||||||
memcpy_amd(targ->ptex->memptr, g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy, 4 * GPU_TEXWIDTH * targ->height);
|
memcpy_amd(targ->ptex->memptr, g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy, 4 * GPU_TEXWIDTH * targ->height);
|
||||||
|
|
||||||
vector<u8> texdata;
|
__aligned16 u8* ptexdata = NULL;
|
||||||
u8* ptexdata = NULL;
|
bool has_data = false;
|
||||||
|
|
||||||
if (PSMT_ISCLUT(tex0.psm))
|
if (PSMT_ISCLUT(tex0.psm))
|
||||||
{
|
{
|
||||||
texdata.resize(((tex0.cpsm <= 1) ? 4 : 2) * texW * texH);
|
ptexdata = (u8*)_aligned_malloc(((tex0.cpsm <= 1) ? 4 : 2) * texW * texH, 16);
|
||||||
ptexdata = &texdata[0];
|
has_data = true;
|
||||||
|
|
||||||
u8* psrc = (u8*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
u8* psrc = (u8*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
||||||
|
|
||||||
|
@ -2231,43 +2231,16 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
{
|
{
|
||||||
if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
|
if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
|
||||||
{
|
{
|
||||||
texdata.resize(4 * texW * texH
|
ptexdata = (u8*)_aligned_malloc(4 * texW * texH, 16);
|
||||||
#if defined(ZEROGS_SSE2)
|
has_data = true;
|
||||||
+ 15 // reserve additional elements for alignment if SSE2 used.
|
|
||||||
// better do it now, so less resizing would be needed
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
|
|
||||||
ptexdata = &texdata[0];
|
|
||||||
|
|
||||||
// needs to be 8 bit, use xmm for unpacking
|
// needs to be 8 bit, use xmm for unpacking
|
||||||
u16* dst = (u16*)ptexdata;
|
u16* dst = (u16*)ptexdata;
|
||||||
u16* src = (u16*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
u16* src = (u16*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
||||||
|
|
||||||
#if defined(ZEROGS_SSE2)
|
#if defined(ZEROGS_SSE2)
|
||||||
if (((u32)(uptr)dst) % 16 != 0)
|
assert(((u32)(uptr)dst) % 16 == 0);
|
||||||
{
|
SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16);
|
||||||
// This is not unusual situation, when vector<u8> does not 16bit alignment, that is destructive for SSE2
|
|
||||||
// instruction movdqa [%eax], xmm0
|
|
||||||
// The idea would be resize vector to 15 elements, that set ptxedata to aligned position.
|
|
||||||
// Later we would move eax by 16, so only we should verify is first element align
|
|
||||||
|
|
||||||
// FIXME. As I see, texdata used only once here, it does not have any impact on other code.
|
|
||||||
// Probably, usage of _aligned_maloc() would be preferable.
|
|
||||||
|
|
||||||
// Update: Apparently not, as Zeydlitz says it leads to a heavy slowdown. --arcum42
|
|
||||||
|
|
||||||
// Note: this often happens when changing AA.
|
|
||||||
int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15
|
|
||||||
ptexdata = &texdata[disalignment]; // Set pointer to aligned element
|
|
||||||
dst = (u16*)ptexdata;
|
|
||||||
ZZLog::GS_Log("Made alignment for texdata, 0x%x", dst);
|
|
||||||
assert(((u32)(uptr)dst) % 16 == 0); // Assert, because at future could be vectors with uncontigious spaces
|
|
||||||
}
|
|
||||||
|
|
||||||
int iters = targ->height * GPU_TEXWIDTH / 16;
|
|
||||||
|
|
||||||
SSE2_UnswizzleZ16Target(dst, src, iters) ;
|
|
||||||
#else // ZEROGS_SSE2
|
#else // ZEROGS_SSE2
|
||||||
|
|
||||||
for (int i = 0; i < targ->height; ++i)
|
for (int i = 0; i < targ->height; ++i)
|
||||||
|
@ -2288,6 +2261,8 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ptexdata = targ->ptex->memptr;
|
ptexdata = targ->ptex->memptr;
|
||||||
|
// We really don't want to deallocate memptr. As a reminder...
|
||||||
|
has_data = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2320,6 +2295,7 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
{
|
{
|
||||||
ZZLog::Error_Log("Failed to create %dx%x texture.", GPU_TEXWIDTH*channels*widthmult, (realheight + widthmult - 1) / widthmult);
|
ZZLog::Error_Log("Failed to create %dx%x texture.", GPU_TEXWIDTH*channels*widthmult, (realheight + widthmult - 1) / widthmult);
|
||||||
channels = 1;
|
channels = 1;
|
||||||
|
if (has_data) _aligned_free(ptexdata);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2330,6 +2306,7 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
||||||
}
|
}
|
||||||
|
|
||||||
setRectWrap(GL_CLAMP);
|
setRectWrap(GL_CLAMP);
|
||||||
|
if (has_data) _aligned_free(ptexdata);
|
||||||
|
|
||||||
assert(tex0.psm != 0xd);
|
assert(tex0.psm != 0xd);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue