diff --git a/plugins/zzogl-pg/opengl/Mem.cpp b/plugins/zzogl-pg/opengl/Mem.cpp index 974565fc30..f5541df977 100644 --- a/plugins/zzogl-pg/opengl/Mem.cpp +++ b/plugins/zzogl-pg/opengl/Mem.cpp @@ -184,7 +184,7 @@ static __forceinline int RealTransfer(u32 psm, const void* pbyMem, u32 nQWordSiz tempY = gs.imageY; tempX = gs.imageX; Point alignedPt; - + nSize = (nQWordSize * 4 * 2) / tp2; nSize = min(nSize, gs.imageWnew * gs.imageHnew); @@ -241,237 +241,138 @@ void TransferLocalHost24Z(void* pbyMem, u32 nQWordSize) {FUNCLOG} void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) {FUNCLOG} void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) {FUNCLOG} -#define FILL_BLOCK(psm, psmcol) \ -{ \ - b.pageTable = &g_pageTable##psm[0][0]; \ - b.blockTable = &g_blockTable##psm[0][0]; \ - b.columnTable = &g_columnTable##psmcol[0][0]; \ - \ - assert( sizeof(g_pageTable##psm) == b.width * b.height * sizeof(g_pageTable##psm[0][0]) ); \ - \ - psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \ - psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \ - \ - for(i = 0; i < b.height; ++i) \ - { \ - u32 i_width = i*BLOCK_TEXWIDTH; \ - for(j = 0; j < b.width; ++j) \ - { \ - /* fill the table */ \ - u32 u = g_blockTable##psm[(i / b.colheight)][(j / b.colwidth)] * 64 * b.mult + g_columnTable##psmcol[i%b.colheight][j%b.colwidth]; \ - b.pageTable[i * b.width + j] = u; \ - psrcf[i_width + j] = (float)(u) / (float)(GPU_TEXWIDTH * b.mult); \ - } \ - } \ - \ - psrcv = (Vector*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \ - \ - for(i = 0; i < b.height; ++i) \ - { \ - u32 i_width = i*BLOCK_TEXWIDTH; \ - u32 i_width2 = ((i+1)%b.height)*BLOCK_TEXWIDTH; \ - for(j = 0; j < b.width; ++j) \ - { \ - u32 temp = ((j + 1) % b.width); \ - Vector* pv = &psrcv[i_width + j]; \ - pv->x = psrcf[i_width + j]; \ - pv->y = psrcf[i_width + temp]; \ - pv->z = psrcf[i_width2 + j]; \ - pv->w = psrcf[i_width2 + temp]; \ - } \ - } \ -} - -#define FILL_BLOCK_NF(psm, psmcol) \ -{ \ - b.pageTable = &g_pageTable##psm[0][0]; \ - b.blockTable = &g_blockTable##psm[0][0]; \ - b.columnTable = &g_columnTable##psmcol[0][0]; \ - \ - assert( sizeof(g_pageTable##psm) == b.width * b.height * sizeof(g_pageTable##psm[0][0]) ); \ - \ - psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \ - psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \ - \ - for(i = 0; i < b.height; ++i) \ - { \ - u32 i_width = i*BLOCK_TEXWIDTH; \ - for(j = 0; j < b.width; ++j) \ - { \ - /* fill the table */ \ - u32 u = g_blockTable##psm[(i / b.colheight)][(j / b.colwidth)] * 64 * b.mult + g_columnTable##psmcol[i%b.colheight][j%b.colwidth]; \ - b.pageTable[i * b.width + j] = u; \ - psrcw[i_width + j] = u; \ - } \ - } \ -} - -void FillBlocksNF(vector& vBlockData, vector& vBilinearData) +void fill_block(BLOCK b, vector& vBlockData, vector& vBilinearData, int floatfmt) { - FUNCLOG - vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2); + assert( sizeof(b.pageTable) == b.width * b.height * sizeof(b.pageTable[0][0]) ); - int i, j; - BLOCK b; - float* psrcf = NULL; - u16* psrcw = NULL; + float* psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; + u16* psrcw = NULL; + if (!floatfmt) + psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; - memset(m_Blocks, 0, sizeof(m_Blocks)); + for(int i = 0; i < b.height; ++i) + { + u32 i_width = i*BLOCK_TEXWIDTH; + for(int j = 0; j < b.width; ++j) + { + /* fill the table */ + u32 bt = b.blockTable[(i / b.colheight)*(b.width/b.colwidth) + (j / b.colwidth)]; + u32 ct = b.columnTable[(i%b.colheight)*b.colwidth + (j%b.colwidth)]; + u32 u = bt * 64 * b.mult + ct; + b.pageTable[i * b.width + j] = u; + if (floatfmt) + psrcf[i_width + j] = (float)(u) / (float)(GPU_TEXWIDTH * b.mult); + else + psrcw[i_width + j] = u; - // 32 - b.SetDim(64, 32, 0, 0, 1); - FILL_BLOCK_NF(32, 32); - m_Blocks[PSMCT32] = b; - m_Blocks[PSMCT32].SetFun(PSMCT32); + } + } - // 24 (same as 32 except write/readPixel are different) - m_Blocks[PSMCT24] = b; - m_Blocks[PSMCT24].SetFun(PSMCT24); + if (floatfmt) { + Vector* psrcv = (Vector*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; - // 8H (same as 32 except write/readPixel are different) - m_Blocks[PSMT8H] = b; - m_Blocks[PSMT8H].SetFun(PSMT8H); - - m_Blocks[PSMT4HL] = b; - m_Blocks[PSMT4HL].SetFun(PSMT4HL); - - m_Blocks[PSMT4HH] = b; - m_Blocks[PSMT4HH].SetFun(PSMT4HH); - - // 32z - b.SetDim(64, 32, 64, 0, 1); - FILL_BLOCK_NF(32Z, 32); - m_Blocks[PSMT32Z] = b; - m_Blocks[PSMT32Z].SetFun(PSMT32Z); - - // 24Z (same as 32Z except write/readPixel are different) - m_Blocks[PSMT24Z] = b; - m_Blocks[PSMT24Z].SetFun(PSMT24Z); - - // 16 - b.SetDim(64, 64, 0, 32, 2); - FILL_BLOCK_NF(16, 16); - m_Blocks[PSMCT16] = b; - m_Blocks[PSMCT16].SetFun(PSMCT16); - - // 16s - b.SetDim(64, 64, 64, 32, 2); - FILL_BLOCK_NF(16S, 16); - m_Blocks[PSMCT16S] = b; - m_Blocks[PSMCT16S].SetFun(PSMCT16S); - - // 16z - b.SetDim(64, 64, 0, 96, 2); - FILL_BLOCK_NF(16Z, 16); - m_Blocks[PSMT16Z] = b; - m_Blocks[PSMT16Z].SetFun(PSMT16Z); - - // 16sz - b.SetDim(64, 64, 64, 96, 2); - FILL_BLOCK_NF(16SZ, 16); - m_Blocks[PSMT16SZ] = b; - m_Blocks[PSMT16SZ].SetFun(PSMT16SZ); - - // 8 - b.SetDim(128, 64, 0, 160, 4); - FILL_BLOCK_NF(8, 8); - m_Blocks[PSMT8] = b; - m_Blocks[PSMT8].SetFun(PSMT8); - - // 4 - b.SetDim(128, 128, 0, 224, 8); - FILL_BLOCK_NF(4, 4); - m_Blocks[PSMT4] = b; - m_Blocks[PSMT4].SetFun(PSMT4); -} - - -void FillBlocksF(vector& vBlockData, vector& vBilinearData) -{ - FUNCLOG - vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4); - vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(Vector)); - - int i, j; - BLOCK b; - float* psrcf = NULL; - u16* psrcw = NULL; - Vector* psrcv = NULL; - - memset(m_Blocks, 0, sizeof(m_Blocks)); - - // 32 - b.SetDim(64, 32, 0, 0, 1); - FILL_BLOCK(32, 32); - m_Blocks[PSMCT32] = b; - m_Blocks[PSMCT32].SetFun(PSMCT32); - - // 24 (same as 32 except write/readPixel are different) - m_Blocks[PSMCT24] = b; - m_Blocks[PSMCT24].SetFun(PSMCT24); - - // 8H (same as 32 except write/readPixel are different) - m_Blocks[PSMT8H] = b; - m_Blocks[PSMT8H].SetFun(PSMT8H); - - m_Blocks[PSMT4HL] = b; - m_Blocks[PSMT4HL].SetFun(PSMT4HL); - - m_Blocks[PSMT4HH] = b; - m_Blocks[PSMT4HH].SetFun(PSMT4HH); - - // 32z - b.SetDim(64, 32, 64, 0, 1); - FILL_BLOCK(32Z, 32); - m_Blocks[PSMT32Z] = b; - m_Blocks[PSMT32Z].SetFun(PSMT32Z); - - // 24Z (same as 32Z except write/readPixel are different) - m_Blocks[PSMT24Z] = b; - m_Blocks[PSMT24Z].SetFun(PSMT24Z); - - // 16 - b.SetDim(64, 64, 0, 32, 2); - FILL_BLOCK(16, 16); - m_Blocks[PSMCT16] = b; - m_Blocks[PSMCT16].SetFun(PSMCT16); - - // 16s - b.SetDim(64, 64, 64, 32, 2); - FILL_BLOCK(16S, 16); - m_Blocks[PSMCT16S] = b; - m_Blocks[PSMCT16S].SetFun(PSMCT16S); - - // 16z - b.SetDim(64, 64, 0, 96, 2); - FILL_BLOCK(16Z, 16); - m_Blocks[PSMT16Z] = b; - m_Blocks[PSMT16Z].SetFun(PSMT16Z); - - // 16sz - b.SetDim(64, 64, 64, 96, 2); - FILL_BLOCK(16SZ, 16); - m_Blocks[PSMT16SZ] = b; - m_Blocks[PSMT16SZ].SetFun(PSMT16SZ); - - // 8 - b.SetDim(128, 64, 0, 160, 4); - FILL_BLOCK(8, 8); - m_Blocks[PSMT8] = b; - m_Blocks[PSMT8].SetFun(PSMT8); - - // 4 - b.SetDim(128, 128, 0, 224, 8); - FILL_BLOCK(4, 4); - m_Blocks[PSMT4] = b; - m_Blocks[PSMT4].SetFun(PSMT4); + for(int i = 0; i < b.height; ++i) + { + u32 i_width = i*BLOCK_TEXWIDTH; + u32 i_width2 = ((i+1)%b.height)*BLOCK_TEXWIDTH; + for(int j = 0; j < b.width; ++j) + { + u32 temp = ((j + 1) % b.width); + Vector* pv = &psrcv[i_width + j]; + pv->x = psrcf[i_width + j]; + pv->y = psrcf[i_width + temp]; + pv->z = psrcf[i_width2 + j]; + pv->w = psrcf[i_width2 + temp]; + } + } + } } void BLOCK::FillBlocks(vector& vBlockData, vector& vBilinearData, int floatfmt) { FUNCLOG - if (floatfmt) - FillBlocksF(vBlockData, vBilinearData); - else - FillBlocksNF(vBlockData, vBilinearData); + if (floatfmt) { + vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4); + vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(Vector)); + } else { + vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2); + } + + BLOCK b; + + memset(m_Blocks, 0, sizeof(m_Blocks)); + + // 32 + b.SetDim(64, 32, 0, 0, 1); + b.SetTable(PSMCT32); + fill_block(b, vBlockData, vBilinearData, floatfmt); + m_Blocks[PSMCT32] = b; + m_Blocks[PSMCT32].SetFun(PSMCT32); + + // 24 (same as 32 except write/readPixel are different) + m_Blocks[PSMCT24] = b; + m_Blocks[PSMCT24].SetFun(PSMCT24); + + // 8H (same as 32 except write/readPixel are different) + m_Blocks[PSMT8H] = b; + m_Blocks[PSMT8H].SetFun(PSMT8H); + + m_Blocks[PSMT4HL] = b; + m_Blocks[PSMT4HL].SetFun(PSMT4HL); + + m_Blocks[PSMT4HH] = b; + m_Blocks[PSMT4HH].SetFun(PSMT4HH); + + // 32z + b.SetDim(64, 32, 64, 0, 1); + b.SetTable(PSMT32Z); + fill_block(b, vBlockData, vBilinearData, floatfmt); + m_Blocks[PSMT32Z] = b; + m_Blocks[PSMT32Z].SetFun(PSMT32Z); + + // 24Z (same as 32Z except write/readPixel are different) + m_Blocks[PSMT24Z] = b; + m_Blocks[PSMT24Z].SetFun(PSMT24Z); + + // 16 + b.SetDim(64, 64, 0, 32, 2); + b.SetTable(PSMCT16); + fill_block(b, vBlockData, vBilinearData, floatfmt); + m_Blocks[PSMCT16] = b; + m_Blocks[PSMCT16].SetFun(PSMCT16); + + // 16s + b.SetDim(64, 64, 64, 32, 2); + b.SetTable(PSMCT16S); + fill_block(b, vBlockData, vBilinearData, floatfmt); + m_Blocks[PSMCT16S] = b; + m_Blocks[PSMCT16S].SetFun(PSMCT16S); + + // 16z + b.SetDim(64, 64, 0, 96, 2); + b.SetTable(PSMT16Z); + fill_block(b, vBlockData, vBilinearData, floatfmt); + m_Blocks[PSMT16Z] = b; + m_Blocks[PSMT16Z].SetFun(PSMT16Z); + + // 16sz + b.SetDim(64, 64, 64, 96, 2); + b.SetTable(PSMT16SZ); + fill_block(b, vBlockData, vBilinearData, floatfmt); + m_Blocks[PSMT16SZ] = b; + m_Blocks[PSMT16SZ].SetFun(PSMT16SZ); + + // 8 + b.SetDim(128, 64, 0, 160, 4); + b.SetTable(PSMT8); + fill_block(b, vBlockData, vBilinearData, floatfmt); + m_Blocks[PSMT8] = b; + m_Blocks[PSMT8].SetFun(PSMT8); + + // 4 + b.SetDim(128, 128, 0, 224, 8); + b.SetTable(PSMT4); + fill_block(b, vBlockData, vBilinearData, floatfmt); + m_Blocks[PSMT4] = b; + m_Blocks[PSMT4].SetFun(PSMT4); } diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index bae24fe7fc..bd96113ad2 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -92,6 +92,29 @@ struct TransferFuncts extern TransferData tData[64]; // rest not visible externally +extern u32 g_blockTable32[4][8]; +extern u32 g_blockTable32Z[4][8]; +extern u32 g_blockTable16[8][4]; +extern u32 g_blockTable16S[8][4]; +extern u32 g_blockTable16Z[8][4]; +extern u32 g_blockTable16SZ[8][4]; +extern u32 g_blockTable8[4][8]; +extern u32 g_blockTable4[8][4]; + +extern u32 g_columnTable32[8][8]; +extern u32 g_columnTable16[8][16]; +extern u32 g_columnTable8[16][16]; +extern u32 g_columnTable4[16][32]; + +extern u32 g_pageTable32[32][64]; +extern u32 g_pageTable32Z[32][64]; +extern u32 g_pageTable16[64][64]; +extern u32 g_pageTable16S[64][64]; +extern u32 g_pageTable16Z[64][64]; +extern u32 g_pageTable16SZ[64][64]; +extern u32 g_pageTable8[64][128]; +extern u32 g_pageTable4[128][128]; + struct BLOCK { BLOCK() { memset(this, 0, sizeof(BLOCK)); } @@ -142,33 +165,61 @@ struct BLOCK TransferHostLocal = TransferHostLocalFun[psm]; TransferLocalHost = TransferLocalHostFun[psm]; } + + void SetTable(u32 psm) + { + switch (psm) { + case PSMCT32: + pageTable = &g_pageTable32[0][0]; + blockTable = &g_blockTable32[0][0]; + columnTable = &g_columnTable32[0][0]; + break; + case PSMT32Z: + pageTable = &g_pageTable32Z[0][0]; + blockTable = &g_blockTable32Z[0][0]; + columnTable = &g_columnTable32[0][0]; + break; + case PSMCT16: + pageTable = &g_pageTable16[0][0]; + blockTable = &g_blockTable16[0][0]; + columnTable = &g_columnTable16[0][0]; + break; + case PSMCT16S: + pageTable = &g_pageTable16S[0][0]; + blockTable = &g_blockTable16S[0][0]; + columnTable = &g_columnTable16[0][0]; + break; + case PSMT16Z: + pageTable = &g_pageTable16Z[0][0]; + blockTable = &g_blockTable16Z[0][0]; + columnTable = &g_columnTable16[0][0]; + break; + case PSMT16SZ: + pageTable = &g_pageTable16SZ[0][0]; + blockTable = &g_blockTable16SZ[0][0]; + columnTable = &g_columnTable16[0][0]; + break; + case PSMT8: + pageTable = &g_pageTable8[0][0]; + blockTable = &g_blockTable8[0][0]; + columnTable = &g_columnTable8[0][0]; + break; + case PSMT4: + pageTable = &g_pageTable4[0][0]; + blockTable = &g_blockTable4[0][0]; + columnTable = &g_columnTable4[0][0]; + break; + default: + pageTable = NULL; + blockTable = NULL; + columnTable = NULL; + break; + } + } }; extern BLOCK m_Blocks[]; -extern u32 g_blockTable32[4][8]; -extern u32 g_blockTable32Z[4][8]; -extern u32 g_blockTable16[8][4]; -extern u32 g_blockTable16S[8][4]; -extern u32 g_blockTable16Z[8][4]; -extern u32 g_blockTable16SZ[8][4]; -extern u32 g_blockTable8[4][8]; -extern u32 g_blockTable4[8][4]; - -extern u32 g_columnTable32[8][8]; -extern u32 g_columnTable16[8][16]; -extern u32 g_columnTable8[16][16]; -extern u32 g_columnTable4[16][32]; - -extern u32 g_pageTable32[32][64]; -extern u32 g_pageTable32Z[32][64]; -extern u32 g_pageTable16[64][64]; -extern u32 g_pageTable16S[64][64]; -extern u32 g_pageTable16Z[64][64]; -extern u32 g_pageTable16SZ[64][64]; -extern u32 g_pageTable8[64][128]; -extern u32 g_pageTable4[128][128]; - static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw) { u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);