mirror of https://github.com/PCSX2/pcsx2.git
GregMiscellaneous: zzogl-pg: Use _aligned_malloc in GetMemoryTarget.
git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3800 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
7afdf9e7c7
commit
4d5f4ad81b
|
@ -78,13 +78,12 @@
|
|||
|
||||
static vector<u8> s_vTempBuffer, s_vTransferCache;
|
||||
static int gs_imageEnd = 0;
|
||||
|
||||
|
||||
// From the start of monster labs. In all 3 cases, psm == 0.
|
||||
// ZZogl-PG: GetRectMemAddress(0x3f4000, 0x404000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f40, 0x100);
|
||||
// ZZogl-PG: GetRectMemAddress(0x3f8000, 0x408000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f80, 0x100);
|
||||
// ZZogl-PG: GetRectMemAddress(0x3fc000, 0x40c000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3fc0, 0x100);
|
||||
|
||||
|
||||
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw)
|
||||
{
|
||||
FUNCLOG
|
||||
|
@ -114,7 +113,7 @@
|
|||
bits = PSMT_BITS_NUM(psm);
|
||||
start = getPixelFun[psm](x, y, bp, bw);
|
||||
end = getPixelFun[psm](x + w - 1, y + h - 1, bp, bw) + 1;
|
||||
|
||||
|
||||
if (bits > 0)
|
||||
{
|
||||
start *= bits;
|
||||
|
|
|
@ -120,9 +120,9 @@ u32 g_columnTable32[8][8] =
|
|||
u32 g_columnTable16[8][16] =
|
||||
{
|
||||
{ 0, 2, 8, 10, 16, 18, 24, 26,
|
||||
1, 3, 9, 11, 17, 19, 25, 27 },
|
||||
1, 3, 9, 11, 17, 19, 25, 27 },
|
||||
{ 4, 6, 12, 14, 20, 22, 28, 30,
|
||||
5, 7, 13, 15, 21, 23, 29, 31 },
|
||||
5, 7, 13, 15, 21, 23, 29, 31 },
|
||||
{ 32, 34, 40, 42, 48, 50, 56, 58,
|
||||
33, 35, 41, 43, 49, 51, 57, 59 },
|
||||
{ 36, 38, 44, 46, 52, 54, 60, 62,
|
||||
|
@ -139,15 +139,15 @@ u32 g_columnTable16[8][16] =
|
|||
|
||||
u32 g_columnTable8[16][16] =
|
||||
{
|
||||
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
|
||||
2, 6, 18, 22, 34, 38, 50, 54 },
|
||||
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
|
||||
2, 6, 18, 22, 34, 38, 50, 54 },
|
||||
{ 8, 12, 24, 28, 40, 44, 56, 60,
|
||||
10, 14, 26, 30, 42, 46, 58, 62 },
|
||||
10, 14, 26, 30, 42, 46, 58, 62 },
|
||||
{ 33, 37, 49, 53, 1, 5, 17, 21,
|
||||
35, 39, 51, 55, 3, 7, 19, 23 },
|
||||
{ 41, 45, 57, 61, 9, 13, 25, 29,
|
||||
43, 47, 59, 63, 11, 15, 27, 31 },
|
||||
{ 96, 100, 112, 116, 64, 68, 80, 84, // column 1
|
||||
{ 96, 100, 112, 116, 64, 68, 80, 84, // column 1
|
||||
98, 102, 114, 118, 66, 70, 82, 86 },
|
||||
{ 104, 108, 120, 124, 72, 76, 88, 92,
|
||||
106, 110, 122, 126, 74, 78, 90, 94 },
|
||||
|
@ -155,7 +155,7 @@ u32 g_columnTable8[16][16] =
|
|||
67, 71, 83, 87, 99, 103, 115, 119 },
|
||||
{ 73, 77, 89, 93, 105, 109, 121, 125,
|
||||
75, 79, 91, 95, 107, 111, 123, 127 },
|
||||
{ 128, 132, 144, 148, 160, 164, 176, 180, // column 2
|
||||
{ 128, 132, 144, 148, 160, 164, 176, 180, // column 2
|
||||
130, 134, 146, 150, 162, 166, 178, 182 },
|
||||
{ 136, 140, 152, 156, 168, 172, 184, 188,
|
||||
138, 142, 154, 158, 170, 174, 186, 190 },
|
||||
|
@ -163,7 +163,7 @@ u32 g_columnTable8[16][16] =
|
|||
163, 167, 179, 183, 131, 135, 147, 151 },
|
||||
{ 169, 173, 185, 189, 137, 141, 153, 157,
|
||||
171, 175, 187, 191, 139, 143, 155, 159 },
|
||||
{ 224, 228, 240, 244, 192, 196, 208, 212, // column 3
|
||||
{ 224, 228, 240, 244, 192, 196, 208, 212, // column 3
|
||||
226, 230, 242, 246, 194, 198, 210, 214 },
|
||||
{ 232, 236, 248, 252, 200, 204, 216, 220,
|
||||
234, 238, 250, 254, 202, 206, 218, 222 },
|
||||
|
@ -175,10 +175,10 @@ u32 g_columnTable8[16][16] =
|
|||
|
||||
u32 g_columnTable4[16][32] =
|
||||
{
|
||||
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
|
||||
2, 10, 34, 42, 66, 74, 98, 106,
|
||||
4, 12, 36, 44, 68, 76, 100, 108,
|
||||
6, 14, 38, 46, 70, 78, 102, 110 },
|
||||
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
|
||||
2, 10, 34, 42, 66, 74, 98, 106,
|
||||
4, 12, 36, 44, 68, 76, 100, 108,
|
||||
6, 14, 38, 46, 70, 78, 102, 110 },
|
||||
{ 16, 24, 48, 56, 80, 88, 112, 120,
|
||||
18, 26, 50, 58, 82, 90, 114, 122,
|
||||
20, 28, 52, 60, 84, 92, 116, 124,
|
||||
|
@ -191,7 +191,7 @@ u32 g_columnTable4[16][32] =
|
|||
83, 91, 115, 123, 19, 27, 51, 59,
|
||||
85, 93, 117, 125, 21, 29, 53, 61,
|
||||
87, 95, 119, 127, 23, 31, 55, 63 },
|
||||
{ 192, 200, 224, 232, 128, 136, 160, 168, // column 1
|
||||
{ 192, 200, 224, 232, 128, 136, 160, 168, // column 1
|
||||
194, 202, 226, 234, 130, 138, 162, 170,
|
||||
196, 204, 228, 236, 132, 140, 164, 172,
|
||||
198, 206, 230, 238, 134, 142, 166, 174 },
|
||||
|
@ -207,7 +207,7 @@ u32 g_columnTable4[16][32] =
|
|||
147, 155, 179, 187, 211, 219, 243, 251,
|
||||
149, 157, 181, 189, 213, 221, 245, 253,
|
||||
151, 159, 183, 191, 215, 223, 247, 255 },
|
||||
{ 256, 264, 288, 296, 320, 328, 352, 360, // column 2
|
||||
{ 256, 264, 288, 296, 320, 328, 352, 360, // column 2
|
||||
258, 266, 290, 298, 322, 330, 354, 362,
|
||||
260, 268, 292, 300, 324, 332, 356, 364,
|
||||
262, 270, 294, 302, 326, 334, 358, 366 },
|
||||
|
@ -223,7 +223,7 @@ u32 g_columnTable4[16][32] =
|
|||
339, 347, 371, 379, 275, 283, 307, 315,
|
||||
341, 349, 373, 381, 277, 285, 309, 317,
|
||||
343, 351, 375, 383, 279, 287, 311, 319 },
|
||||
{ 448, 456, 480, 488, 384, 392, 416, 424, // column 3
|
||||
{ 448, 456, 480, 488, 384, 392, 416, 424, // column 3
|
||||
450, 458, 482, 490, 386, 394, 418, 426,
|
||||
452, 460, 484, 492, 388, 396, 420, 428,
|
||||
454, 462, 486, 494, 390, 398, 422, 430 },
|
||||
|
|
|
@ -2202,13 +2202,12 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
|||
|
||||
memcpy_amd(targ->ptex->memptr, g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy, 4 * GPU_TEXWIDTH * targ->height);
|
||||
|
||||
vector<u8> texdata;
|
||||
u8* ptexdata = NULL;
|
||||
|
||||
if (PSMT_ISCLUT(tex0.psm))
|
||||
{
|
||||
texdata.resize(((tex0.cpsm <= 1) ? 4 : 2) * texW * texH);
|
||||
ptexdata = &texdata[0];
|
||||
u32 tex_size = ((tex0.cpsm <= 1) ? 4 : 2) * texW * texH;
|
||||
ptexdata = (u8*)_aligned_malloc(tex_size, 16);
|
||||
|
||||
u8* psrc = (u8*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
||||
|
||||
|
@ -2231,43 +2230,16 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
|||
{
|
||||
if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
|
||||
{
|
||||
texdata.resize(4 * texW * texH
|
||||
#if defined(ZEROGS_SSE2)
|
||||
+ 15 // reserve additional elements for alignment if SSE2 used.
|
||||
// better do it now, so less resizing would be needed
|
||||
#endif
|
||||
);
|
||||
|
||||
ptexdata = &texdata[0];
|
||||
|
||||
ptexdata = (u8*)_aligned_malloc(4 * texW * texH, 16);
|
||||
|
||||
// needs to be 8 bit, use xmm for unpacking
|
||||
u16* dst = (u16*)ptexdata;
|
||||
u16* src = (u16*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy);
|
||||
|
||||
#if defined(ZEROGS_SSE2)
|
||||
|
||||
if (((u32)(uptr)dst) % 16 != 0)
|
||||
{
|
||||
// This is not unusual situation, when vector<u8> does not 16bit alignment, that is destructive for SSE2
|
||||
// instruction movdqa [%eax], xmm0
|
||||
// The idea would be resize vector to 15 elements, that set ptxedata to aligned position.
|
||||
// Later we would move eax by 16, so only we should verify is first element align
|
||||
// FIXME. As I see, texdata used only once here, it does not have any impact on other code.
|
||||
// Probably, usage of _aligned_maloc() would be preferable.
|
||||
|
||||
// Note: this often happens when changing AA.
|
||||
int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15
|
||||
ptexdata = &texdata[disalignment]; // Set pointer to aligned element
|
||||
dst = (u16*)ptexdata;
|
||||
ZZLog::GS_Log("Made alignment for texdata, 0x%x", dst);
|
||||
assert(((u32)(uptr)dst) % 16 == 0); // Assert, because at future could be vectors with uncontigious spaces
|
||||
}
|
||||
|
||||
int iters = targ->height * GPU_TEXWIDTH / 16;
|
||||
|
||||
SSE2_UnswizzleZ16Target(dst, src, iters) ;
|
||||
assert(((u32)(uptr)dst) % 16 == 0);
|
||||
SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16);
|
||||
#else // ZEROGS_SSE2
|
||||
|
||||
for (int i = 0; i < targ->height; ++i)
|
||||
{
|
||||
for (int j = 0; j < GPU_TEXWIDTH; ++j)
|
||||
|
|
Loading…
Reference in New Issue