From adf43a86ab781a16048f3a91399c45f169240a76 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 24 Mar 2010 08:46:18 +0000 Subject: [PATCH] zzogl-pg: A few more changes. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2764 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/Mem.h | 3 +- plugins/zzogl-pg/opengl/Mem_Swizzle.h | 234 ++++----- plugins/zzogl-pg/opengl/Mem_Transmit.h | 480 +++++++++--------- .../opengl/Win32/zerogsogl_2008.vcproj | 3 +- plugins/zzogl-pg/opengl/x86-32.asm | 24 +- 5 files changed, 372 insertions(+), 372 deletions(-) diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 0780cd8540..9d6d628227 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -48,7 +48,8 @@ typedef void (__fastcall *_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteM // but this works for the moment. struct TransferData { - u32 widthlimit; + // Signed because Visual C++ is weird. + s32 widthlimit; u32 blockbits; u32 blockwidth; u32 blockheight; diff --git a/plugins/zzogl-pg/opengl/Mem_Swizzle.h b/plugins/zzogl-pg/opengl/Mem_Swizzle.h index 3dd648cd0a..588d1cafa5 100644 --- a/plugins/zzogl-pg/opengl/Mem_Swizzle.h +++ b/plugins/zzogl-pg/opengl/Mem_Swizzle.h @@ -1,123 +1,123 @@ -/* ZeroGS KOSMOS - * Copyright (C) 2005-2006 zerofrog@gmail.com - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - +/* ZeroGS KOSMOS + * Copyright (C) 2005-2006 zerofrog@gmail.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + #ifndef MEM_SWIZZLE_H_INCLUDED #define MEM_SWIZZLE_H_INCLUDED - -#include "GS.h" + +#include "GS.h" #include "Mem.h" - -// special swizzle macros - which I converted to functions. - -static __forceinline __fastcall void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) -{ - u8* pnewsrc = src; - u32* pblock = tempblock; - - for(int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch-24) - { - for(int bx = 0; bx < 8; ++bx, pnewsrc += 3) - { - pblock[bx] = *(u32*)pnewsrc; - } - } - - for(int bx = 0; bx < 7; ++bx, pnewsrc += 3) - { - /* might be 1 byte out of bounds of GS memory */ - pblock[bx] = *(u32*)pnewsrc; - } - - /* do 3 bytes for the last copy */ - *((u8*)pblock+28) = pnewsrc[0]; - *((u8*)pblock+29) = pnewsrc[1]; - *((u8*)pblock+30) = pnewsrc[2]; - SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff); -} - -#define SwizzleBlock24u SwizzleBlock24 - -static __forceinline __fastcall void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) -{ - u8* pnewsrc = src; - u32* pblock = tempblock; - - for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) - { - u32 u = *(u32*)pnewsrc; - pblock[0] = u<<24; - pblock[1] = u<<16; - pblock[2] = u<<8; - pblock[3] = u; - u = *(u32*)(pnewsrc+4); - pblock[4] = u<<24; - pblock[5] = u<<16; - pblock[6] = u<<8; - pblock[7] = u; - } - SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000); -} - -#define SwizzleBlock8Hu SwizzleBlock8H - -static __forceinline __fastcall void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) -{ - u8* pnewsrc = src; - u32* pblock = tempblock; - - for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) - { - u32 u = *(u32*)pnewsrc; - pblock[0] = u<<28; - pblock[1] = u<<24; - pblock[2] = u<<20; - pblock[3] = u<<16; - pblock[4] = u<<12; - pblock[5] = u<<8; - pblock[6] = u<<4; - pblock[7] = u; - } - SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000); -} - -#define SwizzleBlock4HHu SwizzleBlock4HH - -static __forceinline __fastcall void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) -{ - u8* pnewsrc = src; - u32* pblock = tempblock; - - for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) - { - u32 u = *(u32*)pnewsrc; - pblock[0] = u<<24; - pblock[1] = u<<20; - pblock[2] = u<<16; - pblock[3] = u<<12; - pblock[4] = u<<8; - pblock[5] = u<<4; - pblock[6] = u; - pblock[7] = u>>4; - } - SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000); -} - -#define SwizzleBlock4HLu SwizzleBlock4HL + +// special swizzle macros - which I converted to functions. + +static __forceinline void __fastcall SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + u8* pnewsrc = src; + u32* pblock = tempblock; + + for(int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch-24) + { + for(int bx = 0; bx < 8; ++bx, pnewsrc += 3) + { + pblock[bx] = *(u32*)pnewsrc; + } + } + + for(int bx = 0; bx < 7; ++bx, pnewsrc += 3) + { + /* might be 1 byte out of bounds of GS memory */ + pblock[bx] = *(u32*)pnewsrc; + } + + /* do 3 bytes for the last copy */ + *((u8*)pblock+28) = pnewsrc[0]; + *((u8*)pblock+29) = pnewsrc[1]; + *((u8*)pblock+30) = pnewsrc[2]; + SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff); +} + +#define SwizzleBlock24u SwizzleBlock24 + +static __forceinline void __fastcall SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + u8* pnewsrc = src; + u32* pblock = tempblock; + + for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) + { + u32 u = *(u32*)pnewsrc; + pblock[0] = u<<24; + pblock[1] = u<<16; + pblock[2] = u<<8; + pblock[3] = u; + u = *(u32*)(pnewsrc+4); + pblock[4] = u<<24; + pblock[5] = u<<16; + pblock[6] = u<<8; + pblock[7] = u; + } + SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000); +} + +#define SwizzleBlock8Hu SwizzleBlock8H + +static __forceinline void __fastcall SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + u8* pnewsrc = src; + u32* pblock = tempblock; + + for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) + { + u32 u = *(u32*)pnewsrc; + pblock[0] = u<<28; + pblock[1] = u<<24; + pblock[2] = u<<20; + pblock[3] = u<<16; + pblock[4] = u<<12; + pblock[5] = u<<8; + pblock[6] = u<<4; + pblock[7] = u; + } + SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000); +} + +#define SwizzleBlock4HHu SwizzleBlock4HH + +static __forceinline void __fastcall SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + u8* pnewsrc = src; + u32* pblock = tempblock; + + for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) + { + u32 u = *(u32*)pnewsrc; + pblock[0] = u<<24; + pblock[1] = u<<20; + pblock[2] = u<<16; + pblock[3] = u<<12; + pblock[4] = u<<8; + pblock[5] = u<<4; + pblock[6] = u; + pblock[7] = u>>4; + } + SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000); +} + +#define SwizzleBlock4HLu SwizzleBlock4HL #endif // MEM_SWIZZLE_H_INCLUDED diff --git a/plugins/zzogl-pg/opengl/Mem_Transmit.h b/plugins/zzogl-pg/opengl/Mem_Transmit.h index 3568cd4780..c3c02b25a7 100644 --- a/plugins/zzogl-pg/opengl/Mem_Transmit.h +++ b/plugins/zzogl-pg/opengl/Mem_Transmit.h @@ -1,245 +1,245 @@ #ifndef MEM_TRANSMIT_H_INCLUDED #define MEM_TRANSMIT_H_INCLUDED - + #include "GS.h" -#include "Mem.h" - -#define DSTPSM gs.dstbuf.psm -extern int tempX, tempY; -extern int pitch, area, fracX; -extern int nSize; -extern u8* pstart; - -// transfers whole rows -template -static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) -{ - assert( (nSize%widthlimit) == 0 && widthlimit <= 4 ); - if ((gs.imageEndX-gs.trxpos.dx) % widthlimit) - { - // GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM); - - for(; tempY < endY; ++tempY) - { - for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 1) - { - /* write as many pixel at one time as possible */ - wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw); - } - } - } - for(; tempY < endY; ++tempY) - { - for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += widthlimit) - { - - /* write as many pixel at one time as possible */ - if( nSize < widthlimit ) return false; - - wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw); - - if( widthlimit > 1 ) - { - wp(pstart, (tempX+1)%2048, tempY%2048, pbuf[1], gs.dstbuf.bw); - - if( widthlimit > 2 ) - { - wp(pstart, (tempX+2)%2048, tempY%2048, pbuf[2], gs.dstbuf.bw); - - if( widthlimit > 3 ) - { - wp(pstart, (tempX+3)%2048, tempY%2048, pbuf[3], gs.dstbuf.bw); - } - } - } - } - - if ( tempX >= gs.imageEndX ) - { - assert(tempX == gs.imageEndX); - tempX = gs.trxpos.dx; - } - else - { - assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 ); - return false; - } - } - return true; -} - -template -static __forceinline bool TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) -{ - for(int tempi = 0; tempi < blockheight; ++tempi) - { - for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf++) - { - wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0], gs.dstbuf.bw); - } - pbuf += pitch - fracX; - } - return true; -} - -// transfers whole rows -template -static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) -{ - if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit)) - { - //GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM); - for(; tempY < endY; ++tempY) - { - for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 3) - { - wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf), gs.dstbuf.bw); - } - - if( tempX >= gs.imageEndX ) - { - assert(gs.imageTransfer == -1 || tempX == gs.imageEndX); - tempX = gs.trxpos.dx; - } - else - { - assert( gs.imageTransfer == -1 || nSize == 0 ); - return false; - } - } - } - else - { - assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 ); - for(; tempY < endY; ++tempY) - { - for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit) - { - if (nSize < widthlimit) return false; - - /* write as many pixel at one time as possible */ - - wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf+0), gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(pbuf+3), gs.dstbuf.bw); - wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(pbuf+6), gs.dstbuf.bw); - wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(pbuf+9), gs.dstbuf.bw); - wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(pbuf+12), gs.dstbuf.bw); - wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(pbuf+15), gs.dstbuf.bw); - wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(pbuf+18), gs.dstbuf.bw); - wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(pbuf+21), gs.dstbuf.bw); - } - - if (tempX >= gs.imageEndX) - { - assert(gs.imageTransfer == -1 || tempX == gs.imageEndX); - tempX = gs.trxpos.dx; - } - else - { - if ( nSize < 0 ) - { - /* extracted too much */ - assert( (nSize%3)==0 && nSize > -24 ); - tempX += nSize/3; - nSize = 0; - } - assert( gs.imageTransfer == -1 || nSize == 0 ); - return false; - } - } - } - return true; -} - -// transmit until endX, don't check size since it has already been prevalidated -template -static __forceinline bool TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) -{ - for(int tempi = 0; tempi < blockheight; ++tempi) - { - for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf += 3) - { - wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw); - } - pbuf += 3*(pitch-fracX); - } - return true; -} - -// meant for 4bit transfers -template -static __forceinline bool TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) -{ - for(; tempY < endY; ++tempY) - { - for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit) - { - /* write as many pixel at one time as possible */ - wp(pstart, tempX%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); - pbuf++; - if ( widthlimit > 2 ) - { - wp(pstart, (tempX+2)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+3)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); - pbuf++; - - if( widthlimit > 4 ) - { - wp(pstart, (tempX+4)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+5)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); - pbuf++; - - if( widthlimit > 6 ) - { - wp(pstart, (tempX+6)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+7)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); - pbuf++; - } - } - } - } - - if ( tempX >= gs.imageEndX ) - { - tempX = gs.trxpos.dx; - } - else - { - assert( gs.imageTransfer == -1 || (nSize/32) == 0 ); - return false; - } - } - return true; -} - -// transmit until endX, don't check size since it has already been prevalidated -template -static __forceinline bool TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) -{ - for(int tempi = 0; tempi < blockheight; ++tempi) - { - for(tempX = startX; tempX < gs.imageEndX; tempX+=2, pbuf++) - { - wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw); - } - pbuf += (pitch-fracX)/2; - } - return true; -} - -// calculate pitch in source buffer -static __forceinline u32 TransPitch(u32 pitch, u32 size) -{ - return pitch * size / 8; -} - -static __forceinline u32 TransPitch2(u32 pitch, u32 size) -{ - if (size == 4) return pitch / 2; - if (size == 24) return pitch * 3; - return pitch; -} +#include "Mem.h" + +#define DSTPSM gs.dstbuf.psm +extern int tempX, tempY; +extern int pitch, area, fracX; +extern int nSize; +extern u8* pstart; + +// transfers whole rows +template +static __forceinline bool TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) +{ + assert( (nSize%widthlimit) == 0 && widthlimit <= 4 ); + if ((gs.imageEndX-gs.trxpos.dx) % widthlimit) + { + // GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM); + + for(; tempY < endY; ++tempY) + { + for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 1) + { + /* write as many pixel at one time as possible */ + wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw); + } + } + } + for(; tempY < endY; ++tempY) + { + for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += widthlimit) + { + + /* write as many pixel at one time as possible */ + if( nSize < widthlimit ) return false; + + wp(pstart, tempX%2048, tempY%2048, pbuf[0], gs.dstbuf.bw); + + if( widthlimit > 1 ) + { + wp(pstart, (tempX+1)%2048, tempY%2048, pbuf[1], gs.dstbuf.bw); + + if( widthlimit > 2 ) + { + wp(pstart, (tempX+2)%2048, tempY%2048, pbuf[2], gs.dstbuf.bw); + + if( widthlimit > 3 ) + { + wp(pstart, (tempX+3)%2048, tempY%2048, pbuf[3], gs.dstbuf.bw); + } + } + } + } + + if ( tempX >= gs.imageEndX ) + { + assert(tempX == gs.imageEndX); + tempX = gs.trxpos.dx; + } + else + { + assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 ); + return false; + } + } + return true; +} + +template +static __forceinline bool TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) +{ + for(u32 tempi = 0; tempi < blockheight; ++tempi) + { + for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf++) + { + wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0], gs.dstbuf.bw); + } + pbuf += pitch - fracX; + } + return true; +} + +// transfers whole rows +template +static __forceinline bool TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) +{ + if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit)) + { + //GS_LOG("Bad Transmission! %d %d, psm: %d\n", gs.trxpos.dx, gs.imageEndX, DSTPSM); + for(; tempY < endY; ++tempY) + { + for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, pbuf += 3) + { + wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf), gs.dstbuf.bw); + } + + if( tempX >= gs.imageEndX ) + { + assert(gs.imageTransfer == -1 || tempX == gs.imageEndX); + tempX = gs.trxpos.dx; + } + else + { + assert( gs.imageTransfer == -1 || nSize == 0 ); + return false; + } + } + } + else + { + assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 ); + for(; tempY < endY; ++tempY) + { + for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, pbuf += 3*widthlimit) + { + if (nSize < widthlimit) return false; + + /* write as many pixel at one time as possible */ + + wp(pstart, tempX%2048, tempY%2048, *(u32*)(pbuf+0), gs.dstbuf.bw); + wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(pbuf+3), gs.dstbuf.bw); + wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(pbuf+6), gs.dstbuf.bw); + wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(pbuf+9), gs.dstbuf.bw); + wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(pbuf+12), gs.dstbuf.bw); + wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(pbuf+15), gs.dstbuf.bw); + wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(pbuf+18), gs.dstbuf.bw); + wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(pbuf+21), gs.dstbuf.bw); + } + + if (tempX >= gs.imageEndX) + { + assert(gs.imageTransfer == -1 || tempX == gs.imageEndX); + tempX = gs.trxpos.dx; + } + else + { + if ( nSize < 0 ) + { + /* extracted too much */ + assert( (nSize%3)==0 && nSize > -24 ); + tempX += nSize/3; + nSize = 0; + } + assert( gs.imageTransfer == -1 || nSize == 0 ); + return false; + } + } + } + return true; +} + +// transmit until endX, don't check size since it has already been prevalidated +template +static __forceinline bool TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) +{ + for(u32 tempi = 0; tempi < blockheight; ++tempi) + { + for(tempX = startX; tempX < gs.imageEndX; tempX++, pbuf += 3) + { + wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)pbuf, gs.dstbuf.bw); + } + pbuf += 3*(pitch-fracX); + } + return true; +} + +// meant for 4bit transfers +template +static __forceinline bool TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *pbuf) +{ + for(; tempY < endY; ++tempY) + { + for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit) + { + /* write as many pixel at one time as possible */ + wp(pstart, tempX%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+1)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); + pbuf++; + if ( widthlimit > 2 ) + { + wp(pstart, (tempX+2)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+3)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); + pbuf++; + + if( widthlimit > 4 ) + { + wp(pstart, (tempX+4)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+5)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); + pbuf++; + + if( widthlimit > 6 ) + { + wp(pstart, (tempX+6)%2048, tempY%2048, *pbuf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+7)%2048, tempY%2048, *pbuf>>4, gs.dstbuf.bw); + pbuf++; + } + } + } + } + + if ( tempX >= gs.imageEndX ) + { + tempX = gs.trxpos.dx; + } + else + { + assert( gs.imageTransfer == -1 || (nSize/32) == 0 ); + return false; + } + } + return true; +} + +// transmit until endX, don't check size since it has already been prevalidated +template +static __forceinline bool TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *pbuf) +{ + for(u32 tempi = 0; tempi < blockheight; ++tempi) + { + for(tempX = startX; tempX < gs.imageEndX; tempX+=2, pbuf++) + { + wp(pstart, tempX%2048, (tempY+tempi)%2048, pbuf[0]&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, pbuf[0]>>4, gs.dstbuf.bw); + } + pbuf += (pitch-fracX)/2; + } + return true; +} + +// calculate pitch in source buffer +static __forceinline u32 TransPitch(u32 pitch, u32 size) +{ + return pitch * size / 8; +} + +static __forceinline u32 TransPitch2(u32 pitch, u32 size) +{ + if (size == 4) return pitch / 2; + if (size == 24) return pitch * 3; + return pitch; +} #endif // MEM_TRANSMIT_H_INCLUDED diff --git a/plugins/zzogl-pg/opengl/Win32/zerogsogl_2008.vcproj b/plugins/zzogl-pg/opengl/Win32/zerogsogl_2008.vcproj index 74ce2d3ad4..23de8041dd 100644 --- a/plugins/zzogl-pg/opengl/Win32/zerogsogl_2008.vcproj +++ b/plugins/zzogl-pg/opengl/Win32/zerogsogl_2008.vcproj @@ -225,7 +225,7 @@ RuntimeLibrary="0" BufferSecurityCheck="false" PrecompiledHeaderFile="" - WarningLevel="3" + WarningLevel="1" SuppressStartupBanner="true" DebugInformationFormat="3" /> @@ -451,7 +451,6 @@ RelativePath="..\Mem_Transmit.h" > - diff --git a/plugins/zzogl-pg/opengl/x86-32.asm b/plugins/zzogl-pg/opengl/x86-32.asm index 1507d2192d..5a006cc262 100644 --- a/plugins/zzogl-pg/opengl/x86-32.asm +++ b/plugins/zzogl-pg/opengl/x86-32.asm @@ -193,7 +193,7 @@ SwizzleBlock32_sse2@WM: ; SwizzleBlock16 ; -@SwizzleBlock16_sse2@12 proc public +@SwizzleBlock16_sse2@16 proc public push ebx @@ -225,13 +225,13 @@ SwizzleBlock32_sse2@WM: ret 4 -@SwizzleBlock16_sse2@12 endp +@SwizzleBlock16_sse2@16 endp ; ; SwizzleBlock8 ; -@SwizzleBlock8_sse2@12 proc public +@SwizzleBlock8_sse2@16 proc public push ebx @@ -287,13 +287,13 @@ SwizzleBlock32_sse2@WM: ret 4 -@SwizzleBlock8_sse2@12 endp +@SwizzleBlock8_sse2@16 endp ; ; SwizzleBlock4 ; -@SwizzleBlock4_sse2@12 proc public +@SwizzleBlock4_sse2@16 proc public push ebx @@ -365,7 +365,7 @@ SwizzleBlock32_sse2@WM: ret 4 -@SwizzleBlock4_sse2@12 endp +@SwizzleBlock4_sse2@16 endp ; ; swizzling with unaligned reads @@ -471,7 +471,7 @@ SwizzleBlock32u_sse2@WM: ; SwizzleBlock16u ; -@SwizzleBlock16u_sse2@12 proc public +@SwizzleBlock16u_sse2@16 proc public push ebx @@ -503,13 +503,13 @@ SwizzleBlock32u_sse2@WM: ret 4 -@SwizzleBlock16u_sse2@12 endp +@SwizzleBlock16u_sse2@16 endp ; ; SwizzleBlock8u ; -@SwizzleBlock8u_sse2@12 proc public +@SwizzleBlock8u_sse2@16 proc public push ebx @@ -569,13 +569,13 @@ SwizzleBlock32u_sse2@WM: ret 4 -@SwizzleBlock8u_sse2@12 endp +@SwizzleBlock8u_sse2@16 endp ; ; SwizzleBlock4u ; -@SwizzleBlock4u_sse2@12 proc public +@SwizzleBlock4u_sse2@16 proc public push ebx @@ -647,6 +647,6 @@ SwizzleBlock32u_sse2@WM: ret 4 -@SwizzleBlock4u_sse2@12 endp +@SwizzleBlock4u_sse2@16 endp end \ No newline at end of file