From 4d5f4ad81b92b067d9bfe883c348acbe5352bcec Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 18 Sep 2010 19:27:11 +0000 Subject: [PATCH] GregMiscellaneous: zzogl-pg: Use _aligned_malloc in GetMemoryTarget. git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3800 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/HostMemory.cpp | 5 ++-- plugins/zzogl-pg/opengl/Mem_Tables.cpp | 30 +++++++++---------- plugins/zzogl-pg/opengl/targets.cpp | 40 ++++---------------------- 3 files changed, 23 insertions(+), 52 deletions(-) diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index 9778f8a9c4..889423bd53 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -78,13 +78,12 @@ static vector s_vTempBuffer, s_vTransferCache; static int gs_imageEnd = 0; - + // From the start of monster labs. In all 3 cases, psm == 0. // ZZogl-PG: GetRectMemAddress(0x3f4000, 0x404000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f40, 0x100); // ZZogl-PG: GetRectMemAddress(0x3f8000, 0x408000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f80, 0x100); // ZZogl-PG: GetRectMemAddress(0x3fc000, 0x40c000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3fc0, 0x100); - void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw) { FUNCLOG @@ -114,7 +113,7 @@ bits = PSMT_BITS_NUM(psm); start = getPixelFun[psm](x, y, bp, bw); end = getPixelFun[psm](x + w - 1, y + h - 1, bp, bw) + 1; - + if (bits > 0) { start *= bits; diff --git a/plugins/zzogl-pg/opengl/Mem_Tables.cpp b/plugins/zzogl-pg/opengl/Mem_Tables.cpp index 6c66a9389c..8db1b5c67c 100644 --- a/plugins/zzogl-pg/opengl/Mem_Tables.cpp +++ b/plugins/zzogl-pg/opengl/Mem_Tables.cpp @@ -120,9 +120,9 @@ u32 g_columnTable32[8][8] = u32 g_columnTable16[8][16] = { { 0, 2, 8, 10, 16, 18, 24, 26, - 1, 3, 9, 11, 17, 19, 25, 27 }, + 1, 3, 9, 11, 17, 19, 25, 27 }, { 4, 6, 12, 14, 20, 22, 28, 30, - 5, 7, 13, 15, 21, 23, 29, 31 }, + 5, 7, 13, 15, 21, 23, 29, 31 }, { 32, 34, 40, 42, 48, 50, 56, 58, 33, 35, 41, 43, 49, 51, 57, 59 }, { 36, 38, 44, 46, 52, 54, 60, 62, @@ -139,15 +139,15 @@ u32 g_columnTable16[8][16] = u32 g_columnTable8[16][16] = { - { 0, 4, 16, 20, 32, 36, 48, 52, // column 0 - 2, 6, 18, 22, 34, 38, 50, 54 }, + { 0, 4, 16, 20, 32, 36, 48, 52, // column 0 + 2, 6, 18, 22, 34, 38, 50, 54 }, { 8, 12, 24, 28, 40, 44, 56, 60, - 10, 14, 26, 30, 42, 46, 58, 62 }, + 10, 14, 26, 30, 42, 46, 58, 62 }, { 33, 37, 49, 53, 1, 5, 17, 21, 35, 39, 51, 55, 3, 7, 19, 23 }, { 41, 45, 57, 61, 9, 13, 25, 29, 43, 47, 59, 63, 11, 15, 27, 31 }, - { 96, 100, 112, 116, 64, 68, 80, 84, // column 1 + { 96, 100, 112, 116, 64, 68, 80, 84, // column 1 98, 102, 114, 118, 66, 70, 82, 86 }, { 104, 108, 120, 124, 72, 76, 88, 92, 106, 110, 122, 126, 74, 78, 90, 94 }, @@ -155,7 +155,7 @@ u32 g_columnTable8[16][16] = 67, 71, 83, 87, 99, 103, 115, 119 }, { 73, 77, 89, 93, 105, 109, 121, 125, 75, 79, 91, 95, 107, 111, 123, 127 }, - { 128, 132, 144, 148, 160, 164, 176, 180, // column 2 + { 128, 132, 144, 148, 160, 164, 176, 180, // column 2 130, 134, 146, 150, 162, 166, 178, 182 }, { 136, 140, 152, 156, 168, 172, 184, 188, 138, 142, 154, 158, 170, 174, 186, 190 }, @@ -163,7 +163,7 @@ u32 g_columnTable8[16][16] = 163, 167, 179, 183, 131, 135, 147, 151 }, { 169, 173, 185, 189, 137, 141, 153, 157, 171, 175, 187, 191, 139, 143, 155, 159 }, - { 224, 228, 240, 244, 192, 196, 208, 212, // column 3 + { 224, 228, 240, 244, 192, 196, 208, 212, // column 3 226, 230, 242, 246, 194, 198, 210, 214 }, { 232, 236, 248, 252, 200, 204, 216, 220, 234, 238, 250, 254, 202, 206, 218, 222 }, @@ -175,10 +175,10 @@ u32 g_columnTable8[16][16] = u32 g_columnTable4[16][32] = { - { 0, 8, 32, 40, 64, 72, 96, 104, // column 0 - 2, 10, 34, 42, 66, 74, 98, 106, - 4, 12, 36, 44, 68, 76, 100, 108, - 6, 14, 38, 46, 70, 78, 102, 110 }, + { 0, 8, 32, 40, 64, 72, 96, 104, // column 0 + 2, 10, 34, 42, 66, 74, 98, 106, + 4, 12, 36, 44, 68, 76, 100, 108, + 6, 14, 38, 46, 70, 78, 102, 110 }, { 16, 24, 48, 56, 80, 88, 112, 120, 18, 26, 50, 58, 82, 90, 114, 122, 20, 28, 52, 60, 84, 92, 116, 124, @@ -191,7 +191,7 @@ u32 g_columnTable4[16][32] = 83, 91, 115, 123, 19, 27, 51, 59, 85, 93, 117, 125, 21, 29, 53, 61, 87, 95, 119, 127, 23, 31, 55, 63 }, - { 192, 200, 224, 232, 128, 136, 160, 168, // column 1 + { 192, 200, 224, 232, 128, 136, 160, 168, // column 1 194, 202, 226, 234, 130, 138, 162, 170, 196, 204, 228, 236, 132, 140, 164, 172, 198, 206, 230, 238, 134, 142, 166, 174 }, @@ -207,7 +207,7 @@ u32 g_columnTable4[16][32] = 147, 155, 179, 187, 211, 219, 243, 251, 149, 157, 181, 189, 213, 221, 245, 253, 151, 159, 183, 191, 215, 223, 247, 255 }, - { 256, 264, 288, 296, 320, 328, 352, 360, // column 2 + { 256, 264, 288, 296, 320, 328, 352, 360, // column 2 258, 266, 290, 298, 322, 330, 354, 362, 260, 268, 292, 300, 324, 332, 356, 364, 262, 270, 294, 302, 326, 334, 358, 366 }, @@ -223,7 +223,7 @@ u32 g_columnTable4[16][32] = 339, 347, 371, 379, 275, 283, 307, 315, 341, 349, 373, 381, 277, 285, 309, 317, 343, 351, 375, 383, 279, 287, 311, 319 }, - { 448, 456, 480, 488, 384, 392, 416, 424, // column 3 + { 448, 456, 480, 488, 384, 392, 416, 424, // column 3 450, 458, 482, 490, 386, 394, 418, 426, 452, 460, 484, 492, 388, 396, 420, 428, 454, 462, 486, 494, 390, 398, 422, 430 }, diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 97b96cc870..83268278c1 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -2202,13 +2202,12 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info memcpy_amd(targ->ptex->memptr, g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy, 4 * GPU_TEXWIDTH * targ->height); - vector texdata; u8* ptexdata = NULL; if (PSMT_ISCLUT(tex0.psm)) { - texdata.resize(((tex0.cpsm <= 1) ? 4 : 2) * texW * texH); - ptexdata = &texdata[0]; + u32 tex_size = ((tex0.cpsm <= 1) ? 4 : 2) * texW * texH; + ptexdata = (u8*)_aligned_malloc(tex_size, 16); u8* psrc = (u8*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy); @@ -2231,43 +2230,16 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info { if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ) { - texdata.resize(4 * texW * texH -#if defined(ZEROGS_SSE2) - + 15 // reserve additional elements for alignment if SSE2 used. - // better do it now, so less resizing would be needed -#endif - ); - - ptexdata = &texdata[0]; - + ptexdata = (u8*)_aligned_malloc(4 * texW * texH, 16); + // needs to be 8 bit, use xmm for unpacking u16* dst = (u16*)ptexdata; u16* src = (u16*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy); #if defined(ZEROGS_SSE2) - - if (((u32)(uptr)dst) % 16 != 0) - { - // This is not unusual situation, when vector does not 16bit alignment, that is destructive for SSE2 - // instruction movdqa [%eax], xmm0 - // The idea would be resize vector to 15 elements, that set ptxedata to aligned position. - // Later we would move eax by 16, so only we should verify is first element align - // FIXME. As I see, texdata used only once here, it does not have any impact on other code. - // Probably, usage of _aligned_maloc() would be preferable. - - // Note: this often happens when changing AA. - int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15 - ptexdata = &texdata[disalignment]; // Set pointer to aligned element - dst = (u16*)ptexdata; - ZZLog::GS_Log("Made alignment for texdata, 0x%x", dst); - assert(((u32)(uptr)dst) % 16 == 0); // Assert, because at future could be vectors with uncontigious spaces - } - - int iters = targ->height * GPU_TEXWIDTH / 16; - - SSE2_UnswizzleZ16Target(dst, src, iters) ; + assert(((u32)(uptr)dst) % 16 == 0); + SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16); #else // ZEROGS_SSE2 - for (int i = 0; i < targ->height; ++i) { for (int j = 0; j < GPU_TEXWIDTH; ++j)