/* ZeroGS * Copyright (C) 2005-2006 zerofrog@gmail.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ #include #include "GS.h" #include "Mem.h" #include "zerogs.h" #include "targets.h" #include "x86.h" u32 g_blockTable32[4][8] = { { 0, 1, 4, 5, 16, 17, 20, 21}, { 2, 3, 6, 7, 18, 19, 22, 23}, { 8, 9, 12, 13, 24, 25, 28, 29}, { 10, 11, 14, 15, 26, 27, 30, 31} }; u32 g_blockTable32Z[4][8] = { { 24, 25, 28, 29, 8, 9, 12, 13}, { 26, 27, 30, 31, 10, 11, 14, 15}, { 16, 17, 20, 21, 0, 1, 4, 5}, { 18, 19, 22, 23, 2, 3, 6, 7} }; u32 g_blockTable16[8][4] = { { 0, 2, 8, 10 }, { 1, 3, 9, 11 }, { 4, 6, 12, 14 }, { 5, 7, 13, 15 }, { 16, 18, 24, 26 }, { 17, 19, 25, 27 }, { 20, 22, 28, 30 }, { 21, 23, 29, 31 } }; u32 g_blockTable16S[8][4] = { { 0, 2, 16, 18 }, { 1, 3, 17, 19 }, { 8, 10, 24, 26 }, { 9, 11, 25, 27 }, { 4, 6, 20, 22 }, { 5, 7, 21, 23 }, { 12, 14, 28, 30 }, { 13, 15, 29, 31 } }; u32 g_blockTable16Z[8][4] = { { 24, 26, 16, 18 }, { 25, 27, 17, 19 }, { 28, 30, 20, 22 }, { 29, 31, 21, 23 }, { 8, 10, 0, 2 }, { 9, 11, 1, 3 }, { 12, 14, 4, 6 }, { 13, 15, 5, 7 } }; u32 g_blockTable16SZ[8][4] = { { 24, 26, 8, 10 }, { 25, 27, 9, 11 }, { 16, 18, 0, 2 }, { 17, 19, 1, 3 }, { 28, 30, 12, 14 }, { 29, 31, 13, 15 }, { 20, 22, 4, 6 }, { 21, 23, 5, 7 } }; u32 g_blockTable8[4][8] = { { 0, 1, 4, 5, 16, 17, 20, 21}, { 2, 3, 6, 7, 18, 19, 22, 23}, { 8, 9, 12, 13, 24, 25, 28, 29}, { 10, 11, 14, 15, 26, 27, 30, 31} }; u32 g_blockTable4[8][4] = { { 0, 2, 8, 10 }, { 1, 3, 9, 11 }, { 4, 6, 12, 14 }, { 5, 7, 13, 15 }, { 16, 18, 24, 26 }, { 17, 19, 25, 27 }, { 20, 22, 28, 30 }, { 21, 23, 29, 31 } }; u32 g_columnTable32[8][8] = { { 0, 1, 4, 5, 8, 9, 12, 13 }, { 2, 3, 6, 7, 10, 11, 14, 15 }, { 16, 17, 20, 21, 24, 25, 28, 29 }, { 18, 19, 22, 23, 26, 27, 30, 31 }, { 32, 33, 36, 37, 40, 41, 44, 45 }, { 34, 35, 38, 39, 42, 43, 46, 47 }, { 48, 49, 52, 53, 56, 57, 60, 61 }, { 50, 51, 54, 55, 58, 59, 62, 63 }, }; u32 g_columnTable16[8][16] = { { 0, 2, 8, 10, 16, 18, 24, 26, 1, 3, 9, 11, 17, 19, 25, 27 }, { 4, 6, 12, 14, 20, 22, 28, 30, 5, 7, 13, 15, 21, 23, 29, 31 }, { 32, 34, 40, 42, 48, 50, 56, 58, 33, 35, 41, 43, 49, 51, 57, 59 }, { 36, 38, 44, 46, 52, 54, 60, 62, 37, 39, 45, 47, 53, 55, 61, 63 }, { 64, 66, 72, 74, 80, 82, 88, 90, 65, 67, 73, 75, 81, 83, 89, 91 }, { 68, 70, 76, 78, 84, 86, 92, 94, 69, 71, 77, 79, 85, 87, 93, 95 }, { 96, 98, 104, 106, 112, 114, 120, 122, 97, 99, 105, 107, 113, 115, 121, 123 }, { 100, 102, 108, 110, 116, 118, 124, 126, 101, 103, 109, 111, 117, 119, 125, 127 }, }; u32 g_columnTable8[16][16] = { { 0, 4, 16, 20, 32, 36, 48, 52, // column 0 2, 6, 18, 22, 34, 38, 50, 54 }, { 8, 12, 24, 28, 40, 44, 56, 60, 10, 14, 26, 30, 42, 46, 58, 62 }, { 33, 37, 49, 53, 1, 5, 17, 21, 35, 39, 51, 55, 3, 7, 19, 23 }, { 41, 45, 57, 61, 9, 13, 25, 29, 43, 47, 59, 63, 11, 15, 27, 31 }, { 96, 100, 112, 116, 64, 68, 80, 84, // column 1 98, 102, 114, 118, 66, 70, 82, 86 }, { 104, 108, 120, 124, 72, 76, 88, 92, 106, 110, 122, 126, 74, 78, 90, 94 }, { 65, 69, 81, 85, 97, 101, 113, 117, 67, 71, 83, 87, 99, 103, 115, 119 }, { 73, 77, 89, 93, 105, 109, 121, 125, 75, 79, 91, 95, 107, 111, 123, 127 }, { 128, 132, 144, 148, 160, 164, 176, 180, // column 2 130, 134, 146, 150, 162, 166, 178, 182 }, { 136, 140, 152, 156, 168, 172, 184, 188, 138, 142, 154, 158, 170, 174, 186, 190 }, { 161, 165, 177, 181, 129, 133, 145, 149, 163, 167, 179, 183, 131, 135, 147, 151 }, { 169, 173, 185, 189, 137, 141, 153, 157, 171, 175, 187, 191, 139, 143, 155, 159 }, { 224, 228, 240, 244, 192, 196, 208, 212, // column 3 226, 230, 242, 246, 194, 198, 210, 214 }, { 232, 236, 248, 252, 200, 204, 216, 220, 234, 238, 250, 254, 202, 206, 218, 222 }, { 193, 197, 209, 213, 225, 229, 241, 245, 195, 199, 211, 215, 227, 231, 243, 247 }, { 201, 205, 217, 221, 233, 237, 249, 253, 203, 207, 219, 223, 235, 239, 251, 255 }, }; u32 g_columnTable4[16][32] = { { 0, 8, 32, 40, 64, 72, 96, 104, // column 0 2, 10, 34, 42, 66, 74, 98, 106, 4, 12, 36, 44, 68, 76, 100, 108, 6, 14, 38, 46, 70, 78, 102, 110 }, { 16, 24, 48, 56, 80, 88, 112, 120, 18, 26, 50, 58, 82, 90, 114, 122, 20, 28, 52, 60, 84, 92, 116, 124, 22, 30, 54, 62, 86, 94, 118, 126 }, { 65, 73, 97, 105, 1, 9, 33, 41, 67, 75, 99, 107, 3, 11, 35, 43, 69, 77, 101, 109, 5, 13, 37, 45, 71, 79, 103, 111, 7, 15, 39, 47 }, { 81, 89, 113, 121, 17, 25, 49, 57, 83, 91, 115, 123, 19, 27, 51, 59, 85, 93, 117, 125, 21, 29, 53, 61, 87, 95, 119, 127, 23, 31, 55, 63 }, { 192, 200, 224, 232, 128, 136, 160, 168, // column 1 194, 202, 226, 234, 130, 138, 162, 170, 196, 204, 228, 236, 132, 140, 164, 172, 198, 206, 230, 238, 134, 142, 166, 174 }, { 208, 216, 240, 248, 144, 152, 176, 184, 210, 218, 242, 250, 146, 154, 178, 186, 212, 220, 244, 252, 148, 156, 180, 188, 214, 222, 246, 254, 150, 158, 182, 190 }, { 129, 137, 161, 169, 193, 201, 225, 233, 131, 139, 163, 171, 195, 203, 227, 235, 133, 141, 165, 173, 197, 205, 229, 237, 135, 143, 167, 175, 199, 207, 231, 239 }, { 145, 153, 177, 185, 209, 217, 241, 249, 147, 155, 179, 187, 211, 219, 243, 251, 149, 157, 181, 189, 213, 221, 245, 253, 151, 159, 183, 191, 215, 223, 247, 255 }, { 256, 264, 288, 296, 320, 328, 352, 360, // column 2 258, 266, 290, 298, 322, 330, 354, 362, 260, 268, 292, 300, 324, 332, 356, 364, 262, 270, 294, 302, 326, 334, 358, 366 }, { 272, 280, 304, 312, 336, 344, 368, 376, 274, 282, 306, 314, 338, 346, 370, 378, 276, 284, 308, 316, 340, 348, 372, 380, 278, 286, 310, 318, 342, 350, 374, 382 }, { 321, 329, 353, 361, 257, 265, 289, 297, 323, 331, 355, 363, 259, 267, 291, 299, 325, 333, 357, 365, 261, 269, 293, 301, 327, 335, 359, 367, 263, 271, 295, 303 }, { 337, 345, 369, 377, 273, 281, 305, 313, 339, 347, 371, 379, 275, 283, 307, 315, 341, 349, 373, 381, 277, 285, 309, 317, 343, 351, 375, 383, 279, 287, 311, 319 }, { 448, 456, 480, 488, 384, 392, 416, 424, // column 3 450, 458, 482, 490, 386, 394, 418, 426, 452, 460, 484, 492, 388, 396, 420, 428, 454, 462, 486, 494, 390, 398, 422, 430 }, { 464, 472, 496, 504, 400, 408, 432, 440, 466, 474, 498, 506, 402, 410, 434, 442, 468, 476, 500, 508, 404, 412, 436, 444, 470, 478, 502, 510, 406, 414, 438, 446 }, { 385, 393, 417, 425, 449, 457, 481, 489, 387, 395, 419, 427, 451, 459, 483, 491, 389, 397, 421, 429, 453, 461, 485, 493, 391, 399, 423, 431, 455, 463, 487, 495 }, { 401, 409, 433, 441, 465, 473, 497, 505, 403, 411, 435, 443, 467, 475, 499, 507, 405, 413, 437, 445, 469, 477, 501, 509, 407, 415, 439, 447, 471, 479, 503, 511 }, }; u32 g_pageTable32[32][64]; u32 g_pageTable32Z[32][64]; u32 g_pageTable16[64][64]; u32 g_pageTable16S[64][64]; u32 g_pageTable16Z[64][64]; u32 g_pageTable16SZ[64][64]; u32 g_pageTable8[64][128]; u32 g_pageTable4[128][128]; BLOCK m_Blocks[0x40]; // do so blocks are indexable static __aligned16 u32 tempblock[64]; #define DSTPSM gs.dstbuf.psm #define START_HOSTLOCAL() \ assert( gs.imageTransfer == 0 ); \ u8* pstart = ZeroGS::g_pbyGSMemory + gs.dstbuf.bp*256; \ \ const u8* pendbuf = (const u8*)pbyMem + nQWordSize*4; \ int i = gs.imageY, j = gs.imageX; \ extern BOOL g_bSaveTrans; #define END_HOSTLOCAL() \ End: \ if( i >= gs.imageEndY ) { \ assert( gs.imageTransfer == -1 || i == gs.imageEndY ); \ gs.imageTransfer = -1; \ /*int start, end; \ ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); \ ZeroGS::g_MemTargs.ClearRange(start, end);*/ \ } \ else { \ /* update new params */ \ gs.imageY = i; \ gs.imageX = j; \ } \ // transfers whole rows #define TRANSMIT_HOSTLOCAL_Y_(psm, T, widthlimit, endY) { \ assert( (nSize%widthlimit) == 0 && widthlimit <= 4 ); \ if( (gs.imageEndX-gs.trxpos.dx)%widthlimit ) { \ /*GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);*/ \ for(; i < endY; ++i) { \ for(; j < gs.imageEndX && nSize > 0; j += 1, nSize -= 1, pbuf += 1) { \ /* write as many pixel at one time as possible */ \ writePixel##psm##_0(pstart, j%2048, i%2048, pbuf[0], gs.dstbuf.bw); \ } \ } \ } \ for(; i < endY; ++i) { \ for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit, pbuf += widthlimit) { \ /* write as many pixel at one time as possible */ \ if( nSize < widthlimit ) goto End; \ writePixel##psm##_0(pstart, j%2048, i%2048, pbuf[0], gs.dstbuf.bw); \ \ if( widthlimit > 1 ) { \ writePixel##psm##_0(pstart, (j+1)%2048, i%2048, pbuf[1], gs.dstbuf.bw); \ \ if( widthlimit > 2 ) { \ writePixel##psm##_0(pstart, (j+2)%2048, i%2048, pbuf[2], gs.dstbuf.bw); \ \ if( widthlimit > 3 ) { \ writePixel##psm##_0(pstart, (j+3)%2048, i%2048, pbuf[3], gs.dstbuf.bw); \ } \ } \ } \ } \ \ if( j >= gs.imageEndX ) { assert(j == gs.imageEndX); j = gs.trxpos.dx; } \ else { assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 ); goto End; } \ } \ } \ // transmit until endX, don't check size since it has already been prevalidated #define TRANSMIT_HOSTLOCAL_X_(psm, T, widthlimit, blockheight, startX) { \ for(int tempi = 0; tempi < blockheight; ++tempi) { \ for(j = startX; j < gs.imageEndX; j++, pbuf++) { \ writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, pbuf[0], gs.dstbuf.bw); \ } \ pbuf += pitch-fracX; \ } \ } \ // transfers whole rows #define TRANSMIT_HOSTLOCAL_Y_24(psm, T, widthlimit, endY) { \ if( widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit) ) { \ /*GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM);*/ \ for(; i < endY; ++i) { \ for(; j < gs.imageEndX && nSize > 0; j += 1, nSize -= 1, pbuf += 3) { \ writePixel##psm##_0(pstart, j%2048, i%2048, *(u32*)(pbuf), gs.dstbuf.bw); \ } \ \ if( j >= gs.imageEndX ) { assert(gs.imageTransfer == -1 || j == gs.imageEndX); j = gs.trxpos.dx; } \ else { assert( gs.imageTransfer == -1 || nSize == 0 ); goto End; } \ } \ } \ else { \ assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 ); \ for(; i < endY; ++i) { \ for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit) { \ if( nSize < widthlimit ) goto End; \ /* write as many pixel at one time as possible */ \ writePixel##psm##_0(pstart, j%2048, i%2048, *(u32*)(pbuf+0), gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+1)%2048, i%2048, *(u32*)(pbuf+3), gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+2)%2048, i%2048, *(u32*)(pbuf+6), gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+3)%2048, i%2048, *(u32*)(pbuf+9), gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+4)%2048, i%2048, *(u32*)(pbuf+12), gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+5)%2048, i%2048, *(u32*)(pbuf+15), gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+6)%2048, i%2048, *(u32*)(pbuf+18), gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+7)%2048, i%2048, *(u32*)(pbuf+21), gs.dstbuf.bw); \ } \ \ if( j >= gs.imageEndX ) { assert(gs.imageTransfer == -1 || j == gs.imageEndX); j = gs.trxpos.dx; } \ else { \ if( nSize < 0 ) { \ /* extracted too much */ \ assert( (nSize%3)==0 && nSize > -24 ); \ j += nSize/3; \ nSize = 0; \ } \ assert( gs.imageTransfer == -1 || nSize == 0 ); \ goto End; \ } \ } \ } \ } \ // transmit until endX, don't check size since it has already been prevalidated #define TRANSMIT_HOSTLOCAL_X_24(psm, T, widthlimit, blockheight, startX) { \ for(int tempi = 0; tempi < blockheight; ++tempi) { \ for(j = startX; j < gs.imageEndX; j++, pbuf += 3) { \ writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw); \ } \ pbuf += 3*(pitch-fracX); \ } \ } \ // meant for 4bit transfers #define TRANSMIT_HOSTLOCAL_Y_4(psm, T, widthlimit, endY) { \ for(; i < endY; ++i) { \ for(; j < gs.imageEndX && nSize > 0; j += widthlimit, nSize -= widthlimit) { \ /* write as many pixel at one time as possible */ \ writePixel##psm##_0(pstart, j%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+1)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \ pbuf++; \ if( widthlimit > 2 ) { \ writePixel##psm##_0(pstart, (j+2)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+3)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \ pbuf++; \ \ if( widthlimit > 4 ) { \ writePixel##psm##_0(pstart, (j+4)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+5)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \ pbuf++; \ \ if( widthlimit > 6 ) { \ writePixel##psm##_0(pstart, (j+6)%2048, i%2048, *pbuf&0x0f, gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+7)%2048, i%2048, *pbuf>>4, gs.dstbuf.bw); \ pbuf++; \ } \ } \ } \ } \ \ if( j >= gs.imageEndX ) { j = gs.trxpos.dx; } \ else { assert( gs.imageTransfer == -1 || (nSize/32) == 0 ); goto End; } \ } \ } \ // transmit until endX, don't check size since it has already been prevalidated #define TRANSMIT_HOSTLOCAL_X_4(psm, T, widthlimit, blockheight, startX) { \ for(int tempi = 0; tempi < blockheight; ++tempi) { \ for(j = startX; j < gs.imageEndX; j+=2, pbuf++) { \ writePixel##psm##_0(pstart, j%2048, (i+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw); \ writePixel##psm##_0(pstart, (j+1)%2048, (i+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw); \ } \ pbuf += (pitch-fracX)/2; \ } \ } \ // calculate pitch in source buffer #define TRANSMIT_PITCH_(pitch, T) (pitch*sizeof(T)) #define TRANSMIT_PITCH_24(pitch, T) (pitch*3) #define TRANSMIT_PITCH_4(pitch, T) (pitch/2) // special swizzle macros #define SwizzleBlock24(dst, src, pitch) { \ u8* pnewsrc = src; \ u32* pblock = tempblock; \ \ for(int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch-24) { \ for(int bx = 0; bx < 8; ++bx, pnewsrc += 3) { \ pblock[bx] = *(u32*)pnewsrc; \ } \ } \ for(int bx = 0; bx < 7; ++bx, pnewsrc += 3) { \ /* might be 1 byte out of bounds of GS memory */ \ pblock[bx] = *(u32*)pnewsrc; \ } \ /* do 3 bytes for the last copy */ \ *((u8*)pblock+28) = pnewsrc[0]; \ *((u8*)pblock+29) = pnewsrc[1]; \ *((u8*)pblock+30) = pnewsrc[2]; \ SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff); \ } \ #define SwizzleBlock24u SwizzleBlock24 #define SwizzleBlock8H(dst, src, pitch) { \ u8* pnewsrc = src; \ u32* pblock = tempblock; \ \ for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) { \ u32 u = *(u32*)pnewsrc; \ pblock[0] = u<<24; \ pblock[1] = u<<16; \ pblock[2] = u<<8; \ pblock[3] = u; \ u = *(u32*)(pnewsrc+4); \ pblock[4] = u<<24; \ pblock[5] = u<<16; \ pblock[6] = u<<8; \ pblock[7] = u; \ } \ SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000); \ } \ #define SwizzleBlock8Hu SwizzleBlock8H #define SwizzleBlock4HH(dst, src, pitch) { \ u8* pnewsrc = src; \ u32* pblock = tempblock; \ \ for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) { \ u32 u = *(u32*)pnewsrc; \ pblock[0] = u<<28; \ pblock[1] = u<<24; \ pblock[2] = u<<20; \ pblock[3] = u<<16; \ pblock[4] = u<<12; \ pblock[5] = u<<8; \ pblock[6] = u<<4; \ pblock[7] = u; \ } \ SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000); \ } \ #define SwizzleBlock4HHu SwizzleBlock4HH #define SwizzleBlock4HL(dst, src, pitch) { \ u8* pnewsrc = src; \ u32* pblock = tempblock; \ \ for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) { \ u32 u = *(u32*)pnewsrc; \ pblock[0] = u<<24; \ pblock[1] = u<<20; \ pblock[2] = u<<16; \ pblock[3] = u<<12; \ pblock[4] = u<<8; \ pblock[5] = u<<4; \ pblock[6] = u; \ pblock[7] = u>>4; \ } \ SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000); \ } \ #define SwizzleBlock4HLu SwizzleBlock4HL // ------------------------ // | Y | // ------------------------ // | block | | // | aligned area | X | // | | | // ------------------------ // | Y | // ------------------------ #define DEFINE_TRANSFERLOCAL(psm, T, widthlimit, blockbits, blockwidth, blockheight, TransSfx, SwizzleBlock) \ int TransferHostLocal##psm(const void* pbyMem, u32 nQWordSize) \ { \ START_HOSTLOCAL(); \ \ const T* pbuf = (const T*)pbyMem; \ int nLeftOver = (nQWordSize*4*2)%(TRANSMIT_PITCH##TransSfx(2, T)); \ int nSize = nQWordSize*4*2/TRANSMIT_PITCH##TransSfx(2, T); \ nSize = min(nSize, gs.imageWnew * gs.imageHnew); \ \ int pitch, area, fracX; \ int endY = ROUND_UPPOW2(i, blockheight); \ int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); \ int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); \ bool bAligned, bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (j == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; \ \ if( (gs.imageEndX-gs.trxpos.dx)%widthlimit ) { \ /* hack */ \ int testwidth = (int)nSize - (gs.imageEndY-i)*(gs.imageEndX-gs.trxpos.dx)+(j-gs.trxpos.dx); \ if ((testwidth <= widthlimit) && (testwidth >= -widthlimit)) { \ /* Don't transfer */ \ /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ \ gs.imageTransfer = -1; \ } \ bCanAlign = false; \ } \ \ /* first align on block boundary */ \ if( MOD_POW2(i, blockheight) || !bCanAlign ) { \ \ if( !bCanAlign ) \ endY = gs.imageEndY; /* transfer the whole image */ \ else \ assert( endY < gs.imageEndY); /* part of alignment condition */ \ \ if( ((gs.imageEndX-gs.trxpos.dx)%widthlimit) || ((gs.imageEndX-j)%widthlimit) ) { \ /* transmit with a width of 1 */ \ TRANSMIT_HOSTLOCAL_Y##TransSfx(psm, T, (1+(DSTPSM == 0x14)), endY); \ } \ else { \ TRANSMIT_HOSTLOCAL_Y##TransSfx(psm, T, widthlimit, endY); \ } \ \ if( nSize == 0 || i == gs.imageEndY ) \ goto End; \ } \ \ assert( MOD_POW2(i, blockheight) == 0 && j == gs.trxpos.dx); \ \ /* can align! */ \ pitch = gs.imageEndX-gs.trxpos.dx; \ area = pitch*blockheight; \ fracX = gs.imageEndX-alignedX; \ \ /* on top of checking whether pbuf is alinged, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ \ bAligned = !((uptr)pbuf & 0xf) && (TRANSMIT_PITCH##TransSfx(pitch, T)&0xf) == 0; \ \ /* transfer aligning to blocks */ \ for(; i < alignedY && nSize >= area; i += blockheight, nSize -= area) { \ \ if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) { \ for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TRANSMIT_PITCH##TransSfx(blockwidth, T)/sizeof(T)) { \ SwizzleBlock(pstart + getPixelAddress##psm##_0(tempj, i, gs.dstbuf.bw)*blockbits/8, \ (u8*)pbuf, TRANSMIT_PITCH##TransSfx(pitch, T)); \ } \ } \ else { \ for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TRANSMIT_PITCH##TransSfx(blockwidth, T)/sizeof(T)) { \ SwizzleBlock##u(pstart + getPixelAddress##psm##_0(tempj, i, gs.dstbuf.bw)*blockbits/8, \ (u8*)pbuf, TRANSMIT_PITCH##TransSfx(pitch, T)); \ } \ } \ \ /* transfer the rest */ \ if( alignedX < gs.imageEndX ) { \ TRANSMIT_HOSTLOCAL_X##TransSfx(psm, T, widthlimit, blockheight, alignedX); \ pbuf -= TRANSMIT_PITCH##TransSfx((alignedX-gs.trxpos.dx), T)/sizeof(T); \ } \ else pbuf += (blockheight-1)*TRANSMIT_PITCH##TransSfx(pitch, T)/sizeof(T); \ j = gs.trxpos.dx; \ } \ \ if( TRANSMIT_PITCH##TransSfx(nSize, T)/4 > 0 ) { \ TRANSMIT_HOSTLOCAL_Y##TransSfx(psm, T, widthlimit, gs.imageEndY); \ /* sometimes wrong sizes are sent (tekken tag) */ \ assert( gs.imageTransfer == -1 || TRANSMIT_PITCH##TransSfx(nSize,T)/4 <= 2 ); \ } \ \ END_HOSTLOCAL(); \ return (nSize * TRANSMIT_PITCH##TransSfx(2, T) + nLeftOver)/2; \ } \ DEFINE_TRANSFERLOCAL(32, u32, 2, 32, 8, 8, _, SwizzleBlock32); DEFINE_TRANSFERLOCAL(32Z, u32, 2, 32, 8, 8, _, SwizzleBlock32); DEFINE_TRANSFERLOCAL(24, u8, 8, 32, 8, 8, _24, SwizzleBlock24); DEFINE_TRANSFERLOCAL(24Z, u8, 8, 32, 8, 8, _24, SwizzleBlock24); DEFINE_TRANSFERLOCAL(16, u16, 4, 16, 16, 8, _, SwizzleBlock16); DEFINE_TRANSFERLOCAL(16S, u16, 4, 16, 16, 8, _, SwizzleBlock16); DEFINE_TRANSFERLOCAL(16Z, u16, 4, 16, 16, 8, _, SwizzleBlock16); DEFINE_TRANSFERLOCAL(16SZ, u16, 4, 16, 16, 8, _, SwizzleBlock16); DEFINE_TRANSFERLOCAL(8, u8, 4, 8, 16, 16, _, SwizzleBlock8); DEFINE_TRANSFERLOCAL(4, u8, 8, 4, 32, 16, _4, SwizzleBlock4); DEFINE_TRANSFERLOCAL(8H, u8, 4, 32, 8, 8, _, SwizzleBlock8H); DEFINE_TRANSFERLOCAL(4HL, u8, 8, 32, 8, 8, _4, SwizzleBlock4HL); DEFINE_TRANSFERLOCAL(4HH, u8, 8, 32, 8, 8, _4, SwizzleBlock4HH); //#define T u8 //#define widthlimit 8 //#define blockbits 4 //#define blockwidth 32 //#define blockheight 16 // //void TransferHostLocal4(const void* pbyMem, u32 nQWordSize) //{ // START_HOSTLOCAL(); // // const T* pbuf = (const T*)pbyMem; // u32 nSize = nQWordSize*16*2/TRANSMIT_PITCH_4(2, T); // nSize = min(nSize, gs.imageWnew * gs.imageHnew); // // int endY = ROUND_UPPOW2(i, blockheight); // int alignedY = ROUND_DOWNPOW2(gs.imageEndY, blockheight); // int alignedX = ROUND_DOWNPOW2(gs.imageEndX, blockwidth); // bool bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (j == gs.trxpos.dx) && (alignedY > endY) && alignedX > gs.trxpos.dx; // // if( (gs.imageEndX-gs.trxpos.dx)%widthlimit ) { // /* hack */ // if( abs((int)nSize - (gs.imageEndY-i)*(gs.imageEndX-gs.trxpos.dx)+(j-gs.trxpos.dx)) <= widthlimit ) { // /* don't transfer */ // /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ // gs.imageTransfer = -1; // } // bCanAlign = false; // } // // /* first align on block boundary */ // if( MOD_POW2(i, blockheight) || !bCanAlign ) { // // if( !bCanAlign ) // endY = gs.imageEndY; /* transfer the whole image */ // else // assert( endY < gs.imageEndY); /* part of alignment condition */ // // if( (DSTPSM == 0x13 || DSTPSM == 0x14) && ((gs.imageEndX-gs.trxpos.dx)%widthlimit) ) { // /* transmit with a width of 1 */ // TRANSMIT_HOSTLOCAL_Y_4(4, T, (1+(DSTPSM == 0x14)), endY); // } // else { // TRANSMIT_HOSTLOCAL_Y_4(4, T, widthlimit, endY); // } // // if( nSize == 0 || i == gs.imageEndY ) // goto End; // } // // assert( MOD_POW2(i, blockheight) == 0 && j == gs.trxpos.dx); // // /* can align! */ // int pitch = gs.imageEndX-gs.trxpos.dx; // u32 area = pitch*blockheight; // int fracX = gs.imageEndX-alignedX; // // /* on top of checking whether pbuf is alinged, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ // bool bAligned = !((u32)pbuf & 0xf) && (TRANSMIT_PITCH_4(pitch, T)&0xf) == 0; // // /* transfer aligning to blocks */ // for(; i < alignedY && nSize >= area; i += blockheight, nSize -= area) { // // if( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)) ) { // for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TRANSMIT_PITCH_4(blockwidth, T)/sizeof(T)) { // SwizzleBlock4(pstart + getPixelAddress4_0(tempj, i, gs.dstbuf.bw)*blockbits/8, // (u8*)pbuf, TRANSMIT_PITCH_4(pitch, T)); // } // } // else { // for(int tempj = gs.trxpos.dx; tempj < alignedX; tempj += blockwidth, pbuf += TRANSMIT_PITCH_4(blockwidth, T)/sizeof(T)) { // SwizzleBlock4u(pstart + getPixelAddress4_0(tempj, i, gs.dstbuf.bw)*blockbits/8, // (u8*)pbuf, TRANSMIT_PITCH_4(pitch, T)); // } // } // // /* transfer the rest */ // if( alignedX < gs.imageEndX ) { // TRANSMIT_HOSTLOCAL_X_4(4, T, widthlimit, blockheight, alignedX); // pbuf -= TRANSMIT_PITCH_4((alignedX-gs.trxpos.dx), T)/sizeof(T); // } // else pbuf += (blockheight-1)*TRANSMIT_PITCH_4(pitch, T)/sizeof(T); // j = 0; // } // // if( TRANSMIT_PITCH_4(nSize, T)/4 > 0 ) { // TRANSMIT_HOSTLOCAL_Y_4(4, T, widthlimit, gs.imageEndY); // /* sometimes wrong sizes are sent (tekken tag) */ // assert( gs.imageTransfer == -1 || TRANSMIT_PITCH_4(nSize,T)/4 <= 2 ); // } // // END_HOSTLOCAL(); //} void TransferLocalHost32(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost24(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost16(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost16S(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost8(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost4(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost8H(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost4HL(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost4HH(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost32Z(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost24Z(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) { } void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) { } #define FILL_BLOCK(bw, bh, ox, oy, mult, psm, psmcol) { \ b.vTexDims = D3DXVECTOR2(BLOCK_TEXWIDTH/(float)(bw), BLOCK_TEXHEIGHT/(float)bh); \ b.vTexBlock = D3DXVECTOR4((float)bw/BLOCK_TEXWIDTH, (float)bh/BLOCK_TEXHEIGHT, ((float)ox+0.2f)/BLOCK_TEXWIDTH, ((float)oy+0.05f)/BLOCK_TEXHEIGHT); \ b.width = bw; \ b.height = bh; \ b.colwidth = bh / 4; \ b.colheight = bw / 8; \ b.bpp = 32/mult; \ \ b.pageTable = &g_pageTable##psm[0][0]; \ b.blockTable = &g_blockTable##psm[0][0]; \ b.columnTable = &g_columnTable##psmcol[0][0]; \ assert( sizeof(g_pageTable##psm) == bw*bh*sizeof(g_pageTable##psm[0][0]) ); \ psrcf = (float*)pBlockTexture->pBits + ox + oy * pBlockTexture->Pitch/sizeof(float); \ psrcw = (WORD*)pBlockTexture->pBits + ox + oy * pBlockTexture->Pitch/sizeof(WORD); \ for(i = 0; i < bh; ++i) { \ for(j = 0; j < bw; ++j) { \ /* fill the table */ \ u32 u = g_blockTable##psm[(i / b.colheight)][(j / b.colwidth)] * 64 * mult + g_columnTable##psmcol[i%b.colheight][j%b.colwidth]; \ b.pageTable[i*bw+j] = u; \ if( format == D3DFMT_R32F ) { \ psrcf[i*pBlockTexture->Pitch/sizeof(float)+j] = (float)(u) / (float)(GPU_TEXWIDTH*mult); \ } \ else { \ psrcw[i*pBlockTexture->Pitch/sizeof(WORD)+j] = u; \ } \ } \ } \ \ if( pBilinearTexture != NULL ) { \ assert( format == D3DFMT_R32F ); \ psrcv = (DXVEC4*)pBilinearTexture->pBits + ox + oy * pBilinearTexture->Pitch/sizeof(DXVEC4); \ for(i = 0; i < bh; ++i) { \ for(j = 0; j < bw; ++j) { \ DXVEC4* pv = &psrcv[i*pBilinearTexture->Pitch/sizeof(DXVEC4)+j]; \ pv->x = psrcf[i*pBlockTexture->Pitch/sizeof(float)+j]; \ pv->y = psrcf[i*pBlockTexture->Pitch/sizeof(float)+((j+1)%bw)]; \ pv->z = psrcf[((i+1)%bh)*pBlockTexture->Pitch/sizeof(float)+j]; \ pv->w = psrcf[((i+1)%bh)*pBlockTexture->Pitch/sizeof(float)+((j+1)%bw)]; \ } \ } \ } \ b.getPixelAddress = getPixelAddress##psm; \ b.getPixelAddress_0 = getPixelAddress##psm##_0; \ b.writePixel = writePixel##psm; \ b.writePixel_0 = writePixel##psm##_0; \ b.readPixel = readPixel##psm; \ b.readPixel_0 = readPixel##psm##_0; \ b.TransferHostLocal = TransferHostLocal##psm; \ b.TransferLocalHost = TransferLocalHost##psm; \ } \ void BLOCK::FillBlocks(const D3DLOCKED_RECT* pBlockTexture, const D3DLOCKED_RECT* pBilinearTexture, D3DFORMAT format) { assert( pBlockTexture != NULL ); assert( format == D3DFMT_R32F || format == D3DFMT_G16R16 ); int i, j; BLOCK b; float* psrcf = NULL; WORD* psrcw = NULL; DXVEC4* psrcv = NULL; memset(m_Blocks, 0, sizeof(m_Blocks)); // 32 FILL_BLOCK(64, 32, 0, 0, 1, 32, 32); m_Blocks[PSMCT32] = b; // 24 (same as 32 except write/readPixel are different) m_Blocks[PSMCT24] = b; m_Blocks[PSMCT24].writePixel = writePixel24; m_Blocks[PSMCT24].writePixel_0 = writePixel24_0; m_Blocks[PSMCT24].readPixel = readPixel24; m_Blocks[PSMCT24].readPixel_0 = readPixel24_0; m_Blocks[PSMCT24].TransferHostLocal = TransferHostLocal24; m_Blocks[PSMCT24].TransferLocalHost = TransferLocalHost24; // 8H (same as 32 except write/readPixel are different) m_Blocks[PSMT8H] = b; m_Blocks[PSMT8H].writePixel = writePixel8H; m_Blocks[PSMT8H].writePixel_0 = writePixel8H_0; m_Blocks[PSMT8H].readPixel = readPixel8H; m_Blocks[PSMT8H].readPixel_0 = readPixel8H_0; m_Blocks[PSMT8H].TransferHostLocal = TransferHostLocal8H; m_Blocks[PSMT8H].TransferLocalHost = TransferLocalHost8H; m_Blocks[PSMT4HL] = b; m_Blocks[PSMT4HL].writePixel = writePixel4HL; m_Blocks[PSMT4HL].writePixel_0 = writePixel4HL_0; m_Blocks[PSMT4HL].readPixel = readPixel4HL; m_Blocks[PSMT4HL].readPixel_0 = readPixel4HL_0; m_Blocks[PSMT4HL].TransferHostLocal = TransferHostLocal4HL; m_Blocks[PSMT4HL].TransferLocalHost = TransferLocalHost4HL; m_Blocks[PSMT4HH] = b; m_Blocks[PSMT4HH].writePixel = writePixel4HH; m_Blocks[PSMT4HH].writePixel_0 = writePixel4HH_0; m_Blocks[PSMT4HH].readPixel = readPixel4HH; m_Blocks[PSMT4HH].readPixel_0 = readPixel4HH_0; m_Blocks[PSMT4HH].TransferHostLocal = TransferHostLocal4HH; m_Blocks[PSMT4HH].TransferLocalHost = TransferLocalHost4HH; // 32z FILL_BLOCK(64, 32, 64, 0, 1, 32Z, 32); m_Blocks[PSMT32Z] = b; // 24Z (same as 32Z except write/readPixel are different) m_Blocks[PSMT24Z] = b; m_Blocks[PSMT24Z].writePixel = writePixel24Z; m_Blocks[PSMT24Z].writePixel_0 = writePixel24Z_0; m_Blocks[PSMT24Z].readPixel = readPixel24Z; m_Blocks[PSMT24Z].readPixel_0 = readPixel24Z_0; m_Blocks[PSMT24Z].TransferHostLocal = TransferHostLocal24Z; m_Blocks[PSMT24Z].TransferLocalHost = TransferLocalHost24Z; // 16 FILL_BLOCK(64, 64, 0, 32, 2, 16, 16); m_Blocks[PSMCT16] = b; // 16s FILL_BLOCK(64, 64, 64, 32, 2, 16S, 16); m_Blocks[PSMCT16S] = b; // 16z FILL_BLOCK(64, 64, 0, 96, 2, 16Z, 16); m_Blocks[PSMT16Z] = b; // 16sz FILL_BLOCK(64, 64, 64, 96, 2, 16SZ, 16); m_Blocks[PSMT16SZ] = b; // 8 FILL_BLOCK(128, 64, 0, 160, 4, 8, 8); m_Blocks[PSMT8] = b; // 4 FILL_BLOCK(128, 128, 0, 224, 8, 4, 4); m_Blocks[PSMT4] = b; }