diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index 5aaa23ed89..2f45eab73d 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -327,6 +327,7 @@ if (gs.imageY >= gs.imageEndY) { + ZZLog::Error_Log("gs.imageY >= gs.imageEndY!"); assert(gs.imageY == gs.imageEndY); gs.imageTransfer = -1; } @@ -335,38 +336,35 @@ __forceinline void _TransferLocalLocal() { //ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); - _writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm]; //writePixelFunction_0(gs.srcbuf.psm); - _readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm]; //readPixelFunction_0(gs.dstbuf.psm); + _writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm]; + _readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm]; u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; u32 widthlimit = 4; + u32 maxX = gs.trxpos.sx + gs.imageWnew; + u32 maxY = gs.trxpos.sy + gs.imageHnew; if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2; if ((gs.imageWnew & widthlimit) != 0) return; - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++) { - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += widthlimit, j2 += widthlimit) { wp(pDstBuf, j2%2048, i2%2048, - rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + wp(pDstBuf, (j2+1)%2048, i2%2048, + rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - //if (widthlimit > 1) + if (widthlimit > 2) { - wp(pDstBuf, (j2+1)%2048, i2%2048, - rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - - if (widthlimit > 2) - { - wp(pDstBuf, (j2+2)%2048, i2%2048, - rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - - //if (widthlimit > 3) - { - wp(pDstBuf, (j2+3)%2048, i2%2048, - rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - } - } + // Then widthlimit == 4. + wp(pDstBuf, (j2+2)%2048, i2%2048, + rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + wp(pDstBuf, (j2+3)%2048, i2%2048, + rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); } } } @@ -375,16 +373,18 @@ __forceinline void _TransferLocalLocal() __forceinline void _TransferLocalLocal_4() { //ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); - _getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm]; //getPixelFunction_0(gs.srcbuf.psm); - _getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm]; //getPixelFunction_0(gs.dstbuf.psm); + _getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm]; + _getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm]; u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + u32 maxX = gs.trxpos.sx + gs.imageWnew; + u32 maxY = gs.trxpos.sy + gs.imageHnew; assert((gs.imageWnew % 8) == 0); - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2) { - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += 8, j2 += 8) { /* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw); diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 17cd34a1fb..7b347078fc 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -270,33 +270,9 @@ static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) return word; } -static __forceinline _getPixelAddress_0 getPixelFunction_0(u32 psm) -{ - return getPixelFun_0[psm]; - /*switch(psm) - { - case PSMCT32: return getPixelAddress32_0; - case PSMCT24: return getPixelAddress24_0; - case PSMCT16: return getPixelAddress16_0; - case PSMCT16S: return getPixelAddress16S_0; - case PSMT8: return getPixelAddress8_0; - case PSMT4: return getPixelAddress4_0; - case PSMT8H: return getPixelAddress8H_0; - case PSMT4HL: return getPixelAddress4HL_0; - case PSMT4HH: return getPixelAddress4HH_0; - case PSMT32Z: return getPixelAddress32Z_0; - case PSMT24Z: return getPixelAddress24Z_0; - case PSMT16Z: return getPixelAddress16Z_0; - case PSMT16SZ: return getPixelAddress16SZ_0; - default: return getPixelAddress32_0; - }*/ -} - #define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw) #define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw) - - static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) { ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)] = pixel; @@ -537,29 +513,6 @@ static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, ((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel; } - -static __forceinline _writePixel_0 writePixelFunction_0(u32 psm) -{ - return writePixelFun_0[psm]; - /*switch(psm) - { - case PSMCT32: return writePixel32_0; - case PSMCT24: return writePixel24_0; - case PSMCT16: return writePixel16_0; - case PSMCT16S: return writePixel16S_0; - case PSMT8: return writePixel8_0; - case PSMT4: return writePixel4_0; - case PSMT8H: return writePixel8H_0; - case PSMT4HL: return writePixel4HL_0; - case PSMT4HH: return writePixel4HH_0; - case PSMT32Z: return writePixel32Z_0; - case PSMT24Z: return writePixel24Z_0; - case PSMT16Z: return writePixel16Z_0; - case PSMT16SZ: return writePixel16SZ_0; - default: return writePixel32_0; - }*/ -} - /////////////// static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw) @@ -637,25 +590,4 @@ static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) return ((const u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)]; } -static __forceinline _readPixel_0 readPixelFunction_0(u32 psm) -{ - return readPixelFun_0[psm]; - /*switch(psm) - { - case PSMCT32: return readPixel32_0; - case PSMCT24: return readPixel24_0; - case PSMCT16: return readPixel16_0; - case PSMCT16S: return readPixel16S_0; - case PSMT8: return readPixel8_0; - case PSMT4: return readPixel4_0; - case PSMT8H: return readPixel8H_0; - case PSMT4HL: return readPixel4HL_0; - case PSMT4HH: return readPixel4HH_0; - case PSMT32Z: return readPixel32Z_0; - case PSMT24Z: return readPixel24Z_0; - case PSMT16Z: return readPixel16Z_0; - case PSMT16SZ: return readPixel16SZ_0; - default: return readPixel32_0; - }*/ -} #endif /* __MEM_H__ */ diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 391452608c..d24376eb38 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -2286,10 +2286,12 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info { // This is not unusual situation, when vector does not 16bit alignment, that is destructive for SSE2 // instruction movdqa [%eax], xmm0 - // The idea would be resise vector to 15 elements, that set ptxedata to aligned position. + // The idea would be resize vector to 15 elements, that set ptxedata to aligned position. // Later we would move eax by 16, so only we should verify is first element align // FIXME. As I see, texdata used only once here, it does not have any impact on other code. // Probably, usage of _aligned_maloc() would be preferable. + + // Note: this often happens when changing AA. int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15 ptexdata = &texdata[disalignment]; // Set pointer to aligned element dst = (u16*)ptexdata; diff --git a/plugins/zzogl-pg/opengl/zerogs.cpp b/plugins/zzogl-pg/opengl/zerogs.cpp index 2d04ed3e27..c311e7771b 100644 --- a/plugins/zzogl-pg/opengl/zerogs.cpp +++ b/plugins/zzogl-pg/opengl/zerogs.cpp @@ -803,15 +803,10 @@ void ZeroGS::KickSprite() int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex); int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex); - - // sprite is too small and AA shows lines (tek4) - - if (s_AAx) - { - gs.gsvertex[last].x += 4; - - if (s_AAy) gs.gsvertex[last].y += 4; - } + + // sprite is too small and AA shows lines (tek4, Mana Khemia) + gs.gsvertex[last].x += (4*s_AAx); + gs.gsvertex[last].y += (4*s_AAy); // might be bad sprite (KH dialog text) //if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y )