/* ZeroGS * Copyright (C) 2005-2006 zerofrog@gmail.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ #if defined(_WIN32) || defined(__WIN32__) #include #include #include #endif #include #include #include #include #include #include #include "GS.h" #include "Mem.h" #include "x86.h" #include "Regs.h" #include "zerogs.h" #include "resource.h" #include "zerogsshaders/zerogsshaders.h" #include "targets.h" #define DEBUG_PS2 0 #define POINT_BUFFERFLUSH 512 #define POINT_BUFFERSIZE (1<<18) #define MINMAX_SHIFT 3 #define MAX_ACTIVECLUTS 16 #define ZEROGS_SAVEVER 0xaa000005 #define STENCIL_ALPHABIT 1 // if set, dest alpha >= 0x80 #define STENCIL_PIXELWRITE 2 // if set, pixel just written (reset after every Flush) #define STENCIL_FBA 4 // if set, just written pixel's alpha >= 0 (reset after every Flush) #define STENCIL_SPECIAL 8 // if set, indicates that pixel passed its alpha test (reset after every Flush) //#define STENCIL_PBE 16 #define STENCIL_CLEAR (2|4|8|16) #define VBSAVELIMIT ((u32)((u8*)&vb[0].nNextFrameHeight-(u8*)&vb[0])) using namespace ZeroGS; static LPDIRECT3D9 pD3D = NULL; // Used to create the D3DDevice LPDIRECT3DDEVICE9 pd3dDevice = NULL; static DXVEC4 s_vznorm; extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve; extern char *libraryName; extern int g_nFrame, g_nRealFrame; extern float fFPS; extern unsigned char revision, build, minor; BOOL g_bDisplayMsg = 1; extern HINSTANCE hInst; BOOL g_bSaveFlushedFrame = 1; BOOL g_bIsLost = 0; int g_nFrameRender = 10; int g_nFramesSkipped = 0; int g_MaxRenderedHeight = 0; #ifdef RELEASE_TO_PUBLIC #define INC_GENVARS() #define INC_TEXVARS() #define INC_ALPHAVARS() #define INC_RESOLVE() #define g_bUpdateEffect 0 #define g_bWriteProfile 0 #define g_bSaveTex 0 #define g_bSaveTrans 0 #define g_bSaveFrame 0 #define g_bSaveFinalFrame 0 #define g_bUpdateStencil 1 #define g_bSaveResolved 0 #else #define INC_GENVARS() ++g_nGenVars #define INC_TEXVARS() ++g_nTexVars #define INC_ALPHAVARS() ++g_nAlphaVars #define INC_RESOLVE() ++g_nResolve BOOL g_bSaveTrans = 0; BOOL g_bUpdateEffect = 0; BOOL g_bWriteProfile = 0; BOOL g_bSaveTex = 0; // saves the curent texture BOOL g_bSaveFrame = 0; // saves the current psurfTarget BOOL g_bSaveFinalFrame = 0; // saves the input to the CRTC BOOL g_bUpdateStencil = 1; // only needed for dest alpha test (unfortunately, it has to be on all the time) BOOL g_bSaveResolved = 0; #endif #define DRAW() pd3dDevice->DrawPrimitive(primtype[curvb.curprim.prim], 0, curvb.dwCount) extern int s_frameskipping; //inline void SetRenderTarget_(int index, LPD3DSURF psurf, int counter, const char* pname) //{ // static LPD3DSURF ptargs[4] = {NULL}; // static int counters[4] = {0}; // static const char* pnames[4] = {NULL}; // // if( ptargs[index] == psurf && psurf != NULL ) // DEBUG_LOG("duplicate targets\n"); // pd3dDevice->SetRenderTarget(index, psurf); // ptargs[index] = psurf; // counters[index] = counter; // pnames[index] = pname; //} // //#define SetRenderTarget(index, psurf) SetRenderTarget_(index, psurf, __COUNTER__, __FUNCTION__) static u32 g_SaveFrameNum = 0; BOOL g_bMakeSnapshot = 0; string strSnapshot; int GPU_TEXWIDTH = 512; float g_fiGPU_TEXWIDTH = 1/512.0f; int g_MaxTexWidth = 4096, g_MaxTexHeight = 4096; // AVI Capture static int s_aviinit = 0; static int s_avicapturing = 0; static LPD3DSURF s_ptexAVICapture = NULL; // system memory texture const u32 g_primmult[8] = { 1, 2, 2, 3, 3, 3, 2, 0xff }; const u32 g_primsub[8] = { 1, 2, 1, 3, 1, 1, 2, 0 }; inline DWORD FtoDW(float f) { return (*((DWORD*)&f)); } float g_fBlockMult = 1; static int s_nFullscreen = 0; int g_nDepthUpdateCount = 0; int g_nDepthBias = 0; // Consts static const D3DPRIMITIVETYPE primtype[8] = { D3DPT_POINTLIST, D3DPT_LINELIST, D3DPT_LINELIST, D3DPT_TRIANGLELIST, D3DPT_TRIANGLELIST, D3DPT_TRIANGLELIST, D3DPT_TRIANGLELIST, D3DPT_FORCE_DWORD }; static const DWORD blendalpha[3] = { D3DBLEND_SRCALPHA, D3DBLEND_DESTALPHA, D3DBLEND_BLENDFACTOR }; static const DWORD blendinvalpha[3] = { D3DBLEND_INVSRCALPHA, D3DBLEND_INVDESTALPHA, D3DBLEND_INVBLENDFACTOR }; static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA) static const DWORD g_dwAlphaCmp[] = { D3DCMP_NEVER, D3DCMP_ALWAYS, D3DCMP_LESS, D3DCMP_LESSEQUAL, D3DCMP_EQUAL, D3DCMP_GREATEREQUAL, D3DCMP_GREATER, D3DCMP_NOTEQUAL }; // used for afail case static const DWORD g_dwReverseAlphaCmp[] = { D3DCMP_ALWAYS, D3DCMP_NEVER, D3DCMP_GREATEREQUAL, D3DCMP_GREATER, D3DCMP_NOTEQUAL, D3DCMP_LESS, D3DCMP_LESSEQUAL, D3DCMP_EQUAL }; static const DWORD g_dwZCmp[] = { D3DCMP_NEVER, D3DCMP_ALWAYS, D3DCMP_GREATEREQUAL, D3DCMP_GREATER }; ///////////////////// // graphics resources static LPD3DDECL pdecl = NULL; static LPD3DVS pvs[16] = {NULL}; static LPD3DPS ppsRegular[4] = {NULL}, ppsTexture[NUM_SHADERS] = {NULL}; static LPD3DPS ppsCRTC[2] = {NULL}, ppsCRTC24[2] = {NULL}, ppsCRTCTarg[2] = {NULL}; int g_nPixelShaderVer = SHADER_30; // default static BYTE* s_lpShaderResources = NULL; static map mapShaderResources; LPD3DTEX s_ptexCurSet[2] = {NULL}; #define s_bForceTexFlush 1 static LPD3DTEX s_ptexNextSet[2] = {NULL}; static ID3DXFont* pFont = NULL; static ID3DXSprite* pSprite = NULL; static LPD3DSURF psurfOrgTarg = NULL, psurfOrgDepth = NULL; LPD3DTEX ptexBlocks = NULL, ptexConv16to32 = NULL; // holds information on block tiling LPD3DTEX ptexBilinearBlocks = NULL; IDirect3DVolumeTexture9* ptexConv32to16 = NULL; static LPD3DTEX s_ptexInterlace = NULL; // holds interlace fields static int s_nInterlaceTexWidth = 0; // width of texture static list s_vecTempTextures; // temporary textures, released at the end of every frame static BOOL s_bTexFlush = FALSE; static LPD3DTEX ptexLogo = NULL; static BOOL s_bWriteDepth = FALSE; static BOOL s_bDestAlphaTest = FALSE; static int s_nLastResolveReset = 0; static int s_nResolveCounts[30] = {0}; // resolve counts for last 30 frames static int s_nCurResolveIndex = 0; int s_nResolved = 0; // number of targets resolved this frame int g_nDepthUsed = 0; // ffx2 pal movies static int s_nWriteDepthCount = 0; static int s_nWireframeCount = 0; static int s_nWriteDestAlphaTest = 0; //////////////////// // State parameters static float fiRendWidth, fiRendHeight; static DWORD dwStencilRef, dwStencilMask; static DXVEC4 vAlphaBlendColor; // used for GPU_COLOR static BYTE bNeedBlendFactorInAlpha; // set if the output source alpha is different from the real source alpha (only when blend factor > 0x80) static DWORD s_dwColorWrite = 0xf; // the color write mask of the current target BOOL g_bDisplayFPS = FALSE; union { struct { BYTE _bNeedAlphaColor; // set if vAlphaBlendColor needs to be set BYTE _b2XAlphaTest; // Only valid when bNeedAlphaColor is set. if 1st bit set set, double all alpha testing values // otherwise alpha testing needs to be done separately. BYTE _bDestAlphaColor; // set to 1 if blending with dest color (process only one tri at a time). If 2, dest alpha is always 1. BYTE _bAlphaClamping; // if first bit is set, do min; if second bit, do max }; u32 _bAlphaState; } g_vars; #define bNeedAlphaColor g_vars._bNeedAlphaColor #define b2XAlphaTest g_vars._b2XAlphaTest #define bDestAlphaColor g_vars._bDestAlphaColor #define bAlphaClamping g_vars._bAlphaClamping int g_PrevBitwiseTexX = -1, g_PrevBitwiseTexY = -1; // textures stored in SAMP_BITWISEANDX and SAMP_BITWISEANDY // stores the buffers for the last RenderCRTC const float g_filog32 = 0.999f / (32.0f * logf(2.0f)); static BOOL s_bAlphaSet = FALSE; static alphaInfo s_alphaInfo; namespace ZeroGS { VB vb[2]; float fiTexWidth[2], fiTexHeight[2]; // current tex width and height LONG width, height; u8* g_pbyGSMemory = NULL; // 4Mb GS system mem u8* g_pbyGSClut = NULL; D3DPRESENT_PARAMETERS d3dpp; BYTE s_AAx = 0, s_AAy = 0; // if AAy is set, then AAx has to be set BYTE bIndepWriteMasks = 1; BOOL s_bBeginScene = FALSE; D3DFORMAT g_RenderFormat = D3DFMT_A16B16G16R16F; int icurctx = -1; LPD3DVB pvbRect = NULL; DXVEC4 g_vdepth = DXVEC4(65536.0f, 256.0f, 1.0f, 65536.0f*256.0f); LPD3DVS pvsBitBlt = NULL, pvsBitBlt30 = NULL; LPD3DPS ppsBitBlt[2] = {NULL}, ppsBitBltDepth[2] = {NULL}, ppsBitBltDepthTex[2] = {NULL}, ppsOne = NULL; LPD3DPS ppsBaseTexture = NULL, ppsConvert16to32 = NULL, ppsConvert32to16 = NULL; extern CRangeManager s_RangeMngr; // manages overwritten memory void FlushTransferRanges(const tex0Info* ptex); // returns the first and last addresses aligned to a page that cover void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw); HRESULT LoadEffects(); HRESULT LoadExtraEffects(); LPD3DPS LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context); static int s_nNewWidth = -1, s_nNewHeight = -1; void ChangeDeviceSize(int nNewWidth, int nNewHeight); void ProcessMessages(); void RenderCustom(float fAlpha); // intro anim struct MESSAGE { MESSAGE() {} MESSAGE(const char* p, DWORD dw) { strcpy(str, p); dwTimeStamp = dw; } char str[255]; DWORD dwTimeStamp; }; static list listMsgs; /////////////////////// // Method Prototypes // /////////////////////// void AdjustTransToAspect(DXVEC4& v, int dispwidth, int dispheight); void KickPoint(); void KickLine(); void KickTriangle(); void KickTriangleFan(); void KickSprite(); void KickDummy(); inline void SetContextTarget(int context); // use to update the d3d state void SetTexVariables(int context); void SetAlphaVariables(const alphaInfo& ainfo); void ResetAlphaVariables(); __forceinline void SetAlphaTestInt(pixTest curtest); __forceinline void RenderAlphaTest(const VB& curvb); __forceinline void RenderStencil(const VB& curvb, DWORD dwUsingSpecialTesting); __forceinline void ProcessStencil(const VB& curvb); __forceinline void RenderFBA(const VB& curvb); __forceinline void ProcessFBA(const VB& curvb); void ResolveInRange(int start, int end); void ExtWrite(); inline LPD3DTEX CreateInterlaceTex(int width) { if( width == s_nInterlaceTexWidth && s_ptexInterlace != NULL ) return s_ptexInterlace; SAFE_RELEASE(s_ptexInterlace); s_nInterlaceTexWidth = width; HRESULT hr; V(pd3dDevice->CreateTexture(width, 1, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &s_ptexInterlace, NULL)); D3DLOCKED_RECT lock; s_ptexInterlace->LockRect(0, &lock, NULL, 0); for(int i = 0; i < width; ++i) ((u32*)lock.pBits)[i] = (i&1) ? 0xffffffff : 0; s_ptexInterlace->UnlockRect(0); return s_ptexInterlace; } DrawFn drawfn[8] = { KickDummy, KickDummy, KickDummy, KickDummy, KickDummy, KickDummy, KickDummy, KickDummy }; }; // end namespace /////////////////// // Context State // /////////////////// ZeroGS::VB::VB() { memset(this, 0, sizeof(VB)); tex0.tw = 1; tex0.th = 1; } ZeroGS::VB::~VB() { Destroy(); } void ZeroGS::VB::Destroy() { Unlock(); SAFE_RELEASE(pvb); prndr = NULL; pdepth = NULL; } void ZeroGS::VB::Lock() { assert(pvb != NULL); if( pbuf == NULL ) { if( dwCurOff+POINT_BUFFERFLUSH > POINT_BUFFERSIZE ) dwCurOff = 0; pvb->Lock(dwCurOff*sizeof(VertexGPU), sizeof(VertexGPU)*POINT_BUFFERFLUSH, (void**)&pbuf, dwCurOff ? D3DLOCK_NOOVERWRITE|D3DLOCK_NOSYSLOCK : D3DLOCK_DISCARD|D3DLOCK_NOSYSLOCK); dwCount = 0; assert( pbuf != NULL ); } } bool ZeroGS::VB::CheckPrim() { Lock(); if( (PRIMMASK & prim->_val) != (PRIMMASK & curprim._val) || primtype[prim->prim] != primtype[curprim.prim] ) return dwCount > 0; return false; } // upper bound on max possible height #define GET_MAXHEIGHT(fbp, fbw, psm) ((((0x00100000-64*(fbp))/(fbw))&~0x1f)<<((psm&2)?1:0)) #include static int maxmin = 608; //static set s_setFBP[2]; // previous frame/zbuf pointers for the last 2 frames //static int s_nCurFBPSet = 0; //static map s_mapFrameHeights[2]; //static int s_nCurFrameMap = 0; // a lot of times, target is too big and overwrites the texture using, if tbp != 0, use it to bound void ZeroGS::VB::CheckFrame(int tbp) { static int bChanged; if( bNeedZCheck ) { PRIM_LOG("zbuf_%d: zbp=0x%x psm=0x%x, zmsk=%d\n", ictx, zbuf.zbp, zbuf.psm, zbuf.zmsk); //zbuf = *zb; } // invalid bpp if( m_Blocks[gsfb.psm].bpp == 0 ) { ERROR_LOG("CheckFrame invalid bpp %d\n", gsfb.psm); return; } bChanged = 0; if( gsfb.fbw <= 0 ) { return; } if( bNeedFrameCheck ) { int maxpos = 0x00100000; // important to set before calling GetTarg bNeedFrameCheck = 0; bNeedZCheck = 0; // add constraints of other targets if( gsfb.fbw > 0 ) { maxpos = 0x00100000-64*gsfb.fbp; // make sure texture is far away from tbp if( gsfb.fbp < tbp && gsfb.fbp + 0x2000 < tbp) { maxpos = min(64*(tbp-gsfb.fbp), maxpos); } if( prndr != NULL ) { // offroad uses 0x80 fbp which messes up targets if( gsfb.fbp + 0x80 < frame.fbp ) { // special case when double buffering (hamsterball) maxpos = min(64*(frame.fbp-gsfb.fbp), maxpos); } } if( zbuf.zbp < tbp && !zbuf.zmsk ) { maxpos = min((tbp-zbuf.zbp)*((zbuf.psm&2)?128:64), maxpos); } // old caching method if( gsfb.fbp < zbuf.zbp && !zbuf.zmsk ) { // zmsk necessary for KH movie int temp = 64*(zbuf.zbp-gsfb.fbp);//min( (0x00100000-64*zbuf.zbp) , 64*(zbuf.zbp-gsfb.fbp) ); maxpos = min(temp, maxpos); } maxpos /= gsfb.fbw; if( gsfb.psm & 2 ) maxpos *= 2; maxpos = min(gsfb.fbh, maxpos); maxpos = min(maxmin, maxpos); //? atelier iris crashes without it if( maxpos > 256 ) maxpos &= ~0x1f; } else { ERROR_LOG("render target null, ignoring\n"); //prndr = NULL; //pdepth = NULL; return; } gsfb.psm &= 0xf; // shadow tower if( prndr != NULL ) { // render target if( prndr->psm != gsfb.psm ) { // behavior for dest alpha varies ResetAlphaVariables(); } } int fbh = (scissor.y1>>MINMAX_SHIFT)+1; if( fbh > 2 && (fbh&1) ) fbh -= 1; if( !(gsfb.psm&2) || !(g_GameSettings&GAME_FULL16BITRES) ) { fbh = min(fbh, maxpos); } frame = gsfb; if (frame.fbw > 1024) frame.fbw = 1024; // if( fbh > 256 && (fbh % m_Blocks[gsfb.psm].height) <= 2 ) { // // dragon ball z // fbh -= fbh%m_Blocks[gsfb.psm].height; // } if( !(frame.psm&2) || !(g_GameSettings&GAME_FULL16BITRES) ) frame.fbh = fbh; if( !(frame.psm&2) ) {//|| !(g_GameSettings&GAME_FULL16BITRES) ) { if( frame.fbh >= 512 ) { // neopets hack maxmin = min(maxmin, frame.fbh); frame.fbh = maxmin; } } // mgs3 hack to get proper resolution, targets after 0x2000 are usually feedback if( g_MaxRenderedHeight >= 0xe0 && frame.fbp >= 0x2000 ) { int considerheight = (g_MaxRenderedHeight/8+31)&~31; if( frame.fbh > considerheight ) frame.fbh = considerheight; else if( frame.fbh <= 32 ) frame.fbh = considerheight; if( frame.fbh == considerheight ) { // stops bad resolves (mgs3) if( !curprim.abe && (!test.ate || test.atst == 0) ) s_nResolved |= 0x100; } } // ffxii hack to stop resolving if( !(frame.psm&2) || !(g_GameSettings&GAME_FULL16BITRES) ) { if( frame.fbp >= 0x3000 && fbh >= 0x1a0 ) { int endfbp = frame.fbp + frame.fbw*fbh/((gsfb.psm&2)?128:64); // see if there is a previous render target in the way, reduce for(CRenderTargetMngr::MAPTARGETS::iterator itnew = s_RTs.mapTargets.begin(); itnew != s_RTs.mapTargets.end(); ++itnew) { if( itnew->second->fbp > frame.fbp && endfbp > itnew->second->fbp ) { endfbp = itnew->second->fbp; } } frame.fbh = (endfbp-frame.fbp)*((gsfb.psm&2)?128:64)/frame.fbw; } } CRenderTarget* pprevrndr = prndr; CDepthTarget* pprevdepth = pdepth; // reset so that Resolve doesn't call Flush prndr = NULL; pdepth = NULL; CRenderTarget* pnewtarg = s_RTs.GetTarg(frame, 0, maxmin); assert( pnewtarg != NULL ); // pnewtarg->fbh >= 0x1c0 needed for ffx if( pnewtarg->fbh >= 0x1c0 && pnewtarg->fbh > frame.fbh && zbuf.zbp < tbp && !zbuf.zmsk ) { // check if zbuf is in the way of the texture (suikoden5) int maxallowedfbh = (tbp-zbuf.zbp)*((zbuf.psm&2)?128:64) / gsfb.fbw; if( gsfb.psm & 2 ) maxallowedfbh *= 2; if( pnewtarg->fbh > maxallowedfbh+32 ) { // +32 needed for ffx2 // destroy and recreate s_RTs.DestroyAll(0, 0x100, pnewtarg->fbw); pnewtarg = s_RTs.GetTarg(frame, 0, maxmin); assert( pnewtarg != NULL ); } } PRIM_LOG("frame_%d: fbp=0x%x fbw=%d fbh=%d(%d) psm=0x%x fbm=0x%x\n", ictx, gsfb.fbp, gsfb.fbw, gsfb.fbh, pnewtarg->fbh, gsfb.psm, gsfb.fbm); if( (pprevrndr != pnewtarg) || (prndr != NULL && (prndr->status & CRenderTarget::TS_NeedUpdate)) ) bChanged = 1; prndr = pnewtarg; // update z frameInfo tempfb; tempfb.fbw = prndr->fbw; tempfb.fbp = zbuf.zbp; tempfb.psm = zbuf.psm; tempfb.fbh = prndr->targheight; if( zbuf.psm == 0x31 ) tempfb.fbm = 0xff000000; else tempfb.fbm = 0; // check if there is a target that exactly aligns with zbuf (zbuf can be cleared this way, gunbird 2) //u32 key = zbuf.zbp|(frame.fbw<<16); //CRenderTargetMngr::MAPTARGETS::iterator it = s_RTs.mapTargets.find(key); // if( it != s_RTs.mapTargets.end() ) { //#ifdef PCSX2_DEBUG // DEBUG_LOG("zbuf resolve\n"); //#endif // if( it->second->status & CRenderTarget::TS_Resolved ) // it->second->Resolve(); // } CDepthTarget* pnewdepth = (CDepthTarget*)s_DepthRTs.GetTarg(tempfb, CRenderTargetMngr::TO_DepthBuffer | CRenderTargetMngr::TO_StrictHeight|(zbuf.zmsk?CRenderTargetMngr::TO_Virtual:0), prndr->targheight);//GET_MAXHEIGHT(zbuf.zbp, gsfb.fbw, 0)); assert( pnewdepth != NULL && prndr != NULL ); assert( pnewdepth->fbh == prndr->targheight ); if( (pprevdepth != pnewdepth) || (pdepth != NULL && (pdepth->status & CRenderTarget::TS_NeedUpdate)) ) bChanged |= 2; pdepth = pnewdepth; if( prndr->status & CRenderTarget::TS_NeedConvert32) { if( pdepth->pdepth != NULL ) pd3dDevice->SetDepthStencilSurface(pdepth->pdepth); prndr->fbh *= 2; prndr->targheight *= 2; prndr->ConvertTo32(); prndr->status &= ~CRenderTarget::TS_NeedConvert32; } else if( prndr->status & CRenderTarget::TS_NeedConvert16 ) { if( pdepth->pdepth != NULL ) pd3dDevice->SetDepthStencilSurface(pdepth->pdepth); prndr->fbh /= 2; prndr->targheight /= 2; prndr->ConvertTo16(); prndr->status &= ~CRenderTarget::TS_NeedConvert16; } } else if( bNeedZCheck ) { bNeedZCheck = 0; CDepthTarget* pprevdepth = pdepth; pdepth = NULL; if( prndr != NULL && gsfb.fbw > 0 ) { // just z changed frameInfo f; f.fbp = zbuf.zbp; f.fbw = prndr->fbw; f.fbh = prndr->fbh; f.psm = zbuf.psm; if( zbuf.psm == 0x31 ) f.fbm = 0xff000000; else f.fbm = 0; CDepthTarget* pnewdepth = (CDepthTarget*)s_DepthRTs.GetTarg(f, CRenderTargetMngr::TO_DepthBuffer|CRenderTargetMngr::TO_StrictHeight| (zbuf.zmsk?CRenderTargetMngr::TO_Virtual:0), prndr->fbh);//GET_MAXHEIGHT(zbuf.zbp, gsfb.fbw, 0)); assert( pnewdepth != NULL && prndr != NULL ); assert( pnewdepth->fbh == prndr->fbh ); if( (pprevdepth != pnewdepth) || (pdepth != NULL && (pdepth->status & CRenderTarget::TS_NeedUpdate)) ) bChanged = 2; pdepth = pnewdepth; } } s_nResolved &= 0xff; // restore if( prndr != NULL ) SetContextTarget(ictx); //if( prndr != NULL && ictx == icurctx) //else bVarsSetTarg = 0; // if( prndr != NULL && bChanged ) { // if( ictx == icurctx ) SetContextTarget(icurctx); // else // bVarsSetTarg = 0; // } } void ZeroGS::VB::FlushTexData() { assert( bNeedTexCheck ); bNeedTexCheck = 0; u32 psm = (uNextTex0Data[0] >> 20) & 0x3f; if( psm == 9 ) psm = 1; // hmm..., ffx intro menu // don't update unless necessary if( uCurTex0Data[0] == uNextTex0Data[0] && (uCurTex0Data[1]&0x1f) == (uNextTex0Data[1]&0x1f) ) { if( PSMT_ISCLUT(psm) ) { // have to write the CLUT again if changed if( (uCurTex0Data[1]&0x1fffffe0) == (uNextTex0Data[1]&0x1fffffe0) ) { if( uNextTex0Data[1]&0xe0000000 ) { //ZeroGS::Flush(ictx); ZeroGS::texClutWrite(ictx); // invalidate to make sure target didn't change! bVarsTexSync = FALSE; } return; } if( (uNextTex0Data[1]&0xe0000000) == 0 ) { if( (uCurTex0Data[1]&0x1ff10000) != (uNextTex0Data[1]&0x1ff10000) ) ZeroGS::Flush(ictx); // clut isn't going to be loaded so can ignore, but at least update CSA and CPSM! uCurTex0Data[1] = (uCurTex0Data[1]&0xe087ffff)|(uNextTex0Data[1]&0x1f780000); if( tex0.cpsm <= 1 ) tex0.csa = (uNextTex0Data[1] >> 24) & 0xf; else tex0.csa = (uNextTex0Data[1] >> 24) & 0x1f; tex0.cpsm = (uNextTex0Data[1] >> 19) & 0xe; ZeroGS::texClutWrite(ictx); bVarsTexSync = FALSE; return; } // fall through } else { //bVarsTexSync = FALSE; return; } } ZeroGS::Flush(ictx); bVarsTexSync = FALSE; bTexConstsSync = FALSE; uCurTex0Data[0] = uNextTex0Data[0]; uCurTex0Data[1] = uNextTex0Data[1]; tex0.tbp0 = (uNextTex0Data[0] & 0x3fff); tex0.tbw = ((uNextTex0Data[0] >> 14) & 0x3f) * 64; tex0.psm = psm; tex0.tw = (uNextTex0Data[0] >> 26) & 0xf; if (tex0.tw > 10) tex0.tw = 10; tex0.tw = 1<> 30) & 0x3) | ((uNextTex0Data[1] & 0x3) << 2); if (tex0.th > 10) tex0.th = 10; tex0.th = 1<> 2) & 0x1; tex0.tfx = (uNextTex0Data[1] >> 3) & 0x3; ZeroGS::fiTexWidth[ictx] = (1/16.0f)/ tex0.tw; ZeroGS::fiTexHeight[ictx] = (1/16.0f) / tex0.th; if (tex0.tbw == 0) tex0.tbw = 64; if( PSMT_ISCLUT(psm) ) { tex0.cbp = ((uNextTex0Data[1] >> 5) & 0x3fff); tex0.cpsm = (uNextTex0Data[1] >> 19) & 0xe; tex0.csm = (uNextTex0Data[1] >> 23) & 0x1; if( tex0.cpsm <= 1 ) tex0.csa = (uNextTex0Data[1] >> 24) & 0xf; else tex0.csa = (uNextTex0Data[1] >> 24) & 0x1f; tex0.cld = (uNextTex0Data[1] >> 29) & 0x7; ZeroGS::texClutWrite(ictx); } } // does one time only initializing/destruction class ZeroGSInit { public: ZeroGSInit() { // clear g_pbyGSMemory = (u8*)_aligned_malloc(0x00410000, 1024); // leave some room for out of range accesses (saves on the checks) memset(g_pbyGSMemory, 0, 0x00410000); g_pbyGSClut = (u8*)_aligned_malloc(256*8, 1024); // need 512 alignment! memset(g_pbyGSClut, 0, 256*8); } ~ZeroGSInit() { _aligned_free(g_pbyGSMemory); g_pbyGSMemory = NULL; _aligned_free(g_pbyGSClut); g_pbyGSClut = NULL; } }; static ZeroGSInit s_ZeroGSInit; HRESULT ZeroGS::Create(LONG _width, LONG _height) { Destroy(1); GSStateReset(); width = _width; height = _height; fiRendWidth = 1.0f / width; fiRendHeight = 1.0f / height; HRESULT hr; if( NULL == (pD3D = Direct3DCreate9(D3D_SDK_VERSION)) ) { ERROR_LOG(_T("Failed to create the direct3d interface.")); return E_FAIL; } D3DDISPLAYMODE d3ddm; if( FAILED( hr = pD3D->GetAdapterDisplayMode( D3DADAPTER_DEFAULT, &d3ddm ) ) ) { ERROR_LOG(_T("Error geting default adapter.")); return hr; } if( conf.options & GSOPTION_FULLSCREEN ) { // choose best mode // RECT rcdesktop; // GetWindowRect(GetDesktopWindow(), &rcdesktop); // width = rcdesktop.right - rcdesktop.left; // height = rcdesktop.bottom - rcdesktop.top; // width = height = 0; // D3DDISPLAYMODE d3ddmtemp; // // int modes = pD3D->GetAdapterModeCount(D3DADAPTER_DEFAULT, d3ddm.Format); // for(int i= 0; i < modes; ++i) { // pD3D->EnumAdapterModes(D3DADAPTER_DEFAULT, d3ddm.Format, i, &d3ddmtemp); // // if( abs(1024-(int)d3ddmtemp.Width) <= abs(1280-width) && abs(768-(int)d3ddmtemp.Height) <= abs(1024-height) ) { // width = d3ddmtemp.Width; // height = d3ddmtemp.Height; // } // } } else { // change to default resolution ChangeDisplaySettings(NULL, 0); } // Set up the structure used to create the D3DDevice. Since we are now // using more complex geometry, we will create a device with a zbuffer. ZeroMemory( &d3dpp, sizeof(d3dpp) ); d3dpp.Windowed = !(conf.options & GSOPTION_FULLSCREEN); d3dpp.hDeviceWindow = GShwnd; d3dpp.SwapEffect = (conf.options & GSOPTION_FULLSCREEN) ? D3DSWAPEFFECT_FLIP : D3DSWAPEFFECT_DISCARD; d3dpp.BackBufferFormat = D3DFMT_A8R8G8B8; d3dpp.EnableAutoDepthStencil = TRUE; d3dpp.AutoDepthStencilFormat = D3DFMT_D24S8; d3dpp.BackBufferWidth = width; d3dpp.BackBufferHeight = height; d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;//(conf.options & GSOPTION_FULLSCREEN) ? D3DPRESENT_INTERVAL_DEFAULT : D3DPRESENT_INTERVAL_IMMEDIATE; d3dpp.Flags = DEBUG_PS2 ? D3DPRESENTFLAG_LOCKABLE_BACKBUFFER : 0; s_nFullscreen = (conf.options & GSOPTION_FULLSCREEN) ? 1 : 0; // Create the D3DDevice UINT adapter = D3DADAPTER_DEFAULT; D3DDEVTYPE devtype = !DEBUG_PS2 ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF; #ifndef PCSX2_DEBUG DWORD hwoptions = D3DCREATE_HARDWARE_VERTEXPROCESSING|D3DCREATE_PUREDEVICE; #else DWORD hwoptions = D3DCREATE_HARDWARE_VERTEXPROCESSING; #endif #ifndef RELEASE_TO_PUBLIC for(UINT i = 0; i < pD3D->GetAdapterCount(); ++i) { D3DADAPTER_IDENTIFIER9 id; HRESULT hr = pD3D->GetAdapterIdentifier(i, 0, &id); if( strcmp(id.Description, "NVIDIA NVPerfHUD") == 0 ) { DEBUG_LOG("Using %s adapter\n", id.Description); adapter = i; devtype = D3DDEVTYPE_REF; break; } } #endif if( FAILED( hr = pD3D->CreateDevice( adapter, devtype, GShwnd, !DEBUG_PS2 ? hwoptions : D3DCREATE_SOFTWARE_VERTEXPROCESSING, &d3dpp, &pd3dDevice ) ) ) { ERROR_LOG(_T("Failed to create hardware device, creating software.\n")); if( FAILED( hr = pD3D->CreateDevice( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, GShwnd, D3DCREATE_SOFTWARE_VERTEXPROCESSING, &d3dpp, &pd3dDevice ) ) ) { ERROR_LOG(_T("Failed to create software device, switching to reference rasterizer.\n")); if( FAILED( hr = pD3D->CreateDevice( D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GShwnd, D3DCREATE_SOFTWARE_VERTEXPROCESSING, &d3dpp, &pd3dDevice ) ) ) return hr; } } // get caps and check if gfx card is ok D3DCAPS9 caps; pd3dDevice->GetDeviceCaps(&caps); if( caps.VertexShaderVersion < D3DVS_VERSION(2,0) ) { ERROR_LOG("*********\nGS ERROR: Need at least vs2.0\n*********\n"); Destroy(1); return E_FAIL; } conf.mrtdepth = 1; if( caps.NumSimultaneousRTs == 1 ) { ERROR_LOG("*********\nGS WARNING: Need at least 2 simultaneous render targets. Some zbuffer effects will look wrong\n*********\n"); conf.mrtdepth = 0; } if( !(caps.PrimitiveMiscCaps & D3DPMISCCAPS_SEPARATEALPHABLEND) ) { ERROR_LOG("*********\nGS ERROR: Need separate alpha blending! Some effects will look bad\n*********\n"); } if( !(caps.PrimitiveMiscCaps & D3DPMISCCAPS_INDEPENDENTWRITEMASKS) ) { ERROR_LOG("******\nGS WARNING: Need independent write masks! Some z buffer effects might look bad\n*********\n"); bIndepWriteMasks = 0; } if( !(caps.PrimitiveMiscCaps & D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING) ) { ERROR_LOG("******\nGS WARNING: Need MRT Post Pixel Shader Blending for some effects\n*********\n"); } hr = pD3D->CheckDeviceFormat( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, D3DUSAGE_RENDERTARGET|D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING, D3DRTYPE_TEXTURE, g_RenderFormat); if( g_GameSettings & GAME_32BITTARGS ) { g_RenderFormat = D3DFMT_A8R8G8B8; ERROR_LOG("Setting 32 bit render target\n"); } else if( FAILED(hr) ) { ERROR_LOG("******\nGS ERROR: Device doesn't support alpha blending for 16bit floating point targets.\nQuality will reduce.\n*********\n"); g_RenderFormat = D3DFMT_A8R8G8B8; } // hr = pD3D->CheckDeviceFormat( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_TEXTURE, D3DFMT_G32R32F); // // if( FAILED(hr) ) { // ERROR_LOG("******\nGS ERROR: Device doesn't support G32R32F textures.\nTextures will look bad.\n*********\n"); // } g_MaxTexWidth = caps.MaxTextureWidth; g_MaxTexHeight = caps.MaxTextureHeight; GPU_TEXWIDTH = caps.MaxTextureWidth/8; g_fiGPU_TEXWIDTH = 1.0f / GPU_TEXWIDTH; //g_RenderFormat = D3DFMT_A8R8G8B8; pd3dDevice->GetRenderTarget(0, &psurfOrgTarg); pd3dDevice->GetDepthStencilSurface(&psurfOrgDepth); SETRS(D3DRS_ZENABLE, TRUE); SETRS(D3DRS_LIGHTING, FALSE); SETRS(D3DRS_SPECULARENABLE, FALSE); V_RETURN( D3DXCreateFont( pd3dDevice, 15, 0, FW_BOLD, 1, FALSE, DEFAULT_CHARSET, OUT_DEFAULT_PRECIS, DEFAULT_QUALITY, DEFAULT_PITCH | FF_DONTCARE, "Arial", &pFont ) ); // create the vertex decl const D3DVERTEXELEMENT9 Decl[] = { { 0, 0, D3DDECLTYPE_SHORT4N, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 }, { 0, 8, D3DDECLTYPE_D3DCOLOR, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0 }, { 0, 12, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 }, { 0, 16, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 1 }, D3DDECL_END() }; V_RETURN(pd3dDevice->CreateVertexDeclaration(Decl, &pdecl)); #ifdef RELEASE_TO_PUBLIC HRSRC hShaderSrc = FindResource(hInst, MAKEINTRESOURCE(IDR_SHADERS), RT_RCDATA); assert( hShaderSrc != NULL ); HGLOBAL hShaderGlob = LoadResource(hInst, hShaderSrc); assert( hShaderGlob != NULL ); s_lpShaderResources = (BYTE*)LockResource(hShaderGlob); #endif // load the effect ERROR_LOG("Creating effects\n"); V_RETURN(LoadEffects()); g_bDisplayMsg = 0; if( caps.VertexShaderVersion >= D3DVS_VERSION(3,0) && caps.PixelShaderVersion >= D3DPS_VERSION(2,0) ) g_nPixelShaderVer = SHADER_30; else if( caps.PixelShaderVersion == D3DPS_VERSION(2,0) ) g_nPixelShaderVer = SHADER_20; else g_nPixelShaderVer = SHADER_20a; #ifdef RELEASE_TO_PUBLIC // create a sample shader clampInfo temp; memset(&temp, 0, sizeof(temp)); temp.wms = 3; temp.wmt = 3; if( g_nPixelShaderVer != SHADER_30 ) { // test more if( LoadShadeEffect(0, 1, 1, 1, 1, temp, 0) == NULL ) { g_nPixelShaderVer = SHADER_20b; if( LoadShadeEffect(0, 1, 1, 1, 1, temp, 0) == NULL ) { g_nPixelShaderVer = SHADER_20; if( LoadShadeEffect(0, 0, 1, 1, 0, temp, 0) == NULL ) { ERROR_LOG("*********\nGS ERROR: Need at least ps2.0 (ps2.0a+ recommended)\n*********\n"); Destroy(1); return E_FAIL; } } } } #endif // set global shader constants pd3dDevice->SetPixelShaderConstantF(27, DXVEC4(0.5f, (g_GameSettings&GAME_EXACTCOLOR)?0.9f/256.0f:0.5f/256.0f, 0,1/255.0f), 1); // g_fExactColor pd3dDevice->SetPixelShaderConstantF(28, DXVEC4(-0.7f, -0.65f, 0.9f,0), 1); // g_fBilinear pd3dDevice->SetPixelShaderConstantF(29, DXVEC4(1.0f/256.0f, 1.0004f, 1, 0.5f), 1); // g_fZBias pd3dDevice->SetPixelShaderConstantF(30, DXVEC4(0,1, 0.001f, 0.5f), 1); // g_fc0 pd3dDevice->SetPixelShaderConstantF(31, DXVEC4(1/1024.0f, 0.2f/1024.0f, 1/128.0f, 1/512.0f), 1); // g_fMult pd3dDevice->SetVertexShaderConstantF(29, DXVEC4(1.0f/256.0f, 1.0004f, 1, 0.5f), 1); // g_fZBias pd3dDevice->SetVertexShaderConstantF(30, DXVEC4(0,1, 0.001f, 0.5f), 1); // g_fc0 pd3dDevice->SetVertexShaderConstantF(31, DXVEC4(0.5f, -0.5f, 0.5f, 0.5f + 0.4f/416.0f), 1); // g_fBitBltTrans g_bDisplayMsg = 1; if( g_nPixelShaderVer == SHADER_20 ) conf.bilinear = 0; ERROR_LOG("Creating extra effects\n"); V_RETURN(LoadExtraEffects()); ERROR_LOG("GS Using pixel shaders %s\n", g_pShaders[g_nPixelShaderVer]); pd3dDevice->Clear(0, NULL, D3DCLEAR_TARGET|D3DCLEAR_STENCIL|D3DCLEAR_ZBUFFER, 0, 1, 0); // init draw fns drawfn[0] = KickPoint; drawfn[1] = KickLine; drawfn[2] = KickLine; drawfn[3] = KickTriangle; drawfn[4] = KickTriangle; drawfn[5] = KickTriangleFan; drawfn[6] = KickSprite; drawfn[7] = KickDummy; SetAA(conf.aa); GSsetGameCRC(g_LastCRC, g_GameSettings); return S_OK; } void ZeroGS::Destroy(BOOL bD3D) { DeleteDeviceObjects(); vb[0].Destroy(); vb[1].Destroy(); for(int i = 0; i < ArraySize(pvs); ++i) { SAFE_RELEASE(pvs[i]); } for(int i = 0; i < ArraySize(ppsRegular); ++i) { SAFE_RELEASE(ppsRegular[i]); } for(int i = 0; i < ArraySize(ppsTexture); ++i) { SAFE_RELEASE(ppsTexture[i]); } SAFE_RELEASE(pvsBitBlt); SAFE_RELEASE(pvsBitBlt30); SAFE_RELEASE(ppsBitBlt[0]); SAFE_RELEASE(ppsBitBlt[1]); SAFE_RELEASE(ppsBitBltDepth[0]); SAFE_RELEASE(ppsBitBltDepth[1]); SAFE_RELEASE(ppsBitBltDepthTex[0]); SAFE_RELEASE(ppsBitBltDepthTex[1]); SAFE_RELEASE(ppsCRTCTarg[0]); SAFE_RELEASE(ppsCRTCTarg[1]); SAFE_RELEASE(ppsCRTC[0]); SAFE_RELEASE(ppsCRTC[1]); SAFE_RELEASE(ppsCRTC24[0]); SAFE_RELEASE(ppsCRTC24[1]); SAFE_RELEASE(ppsOne); SAFE_RELEASE(pdecl); SAFE_RELEASE(pFont); SAFE_RELEASE(psurfOrgTarg); SAFE_RELEASE(psurfOrgDepth); if( bD3D ) { SAFE_RELEASE(pd3dDevice); SAFE_RELEASE(pD3D); } } void ZeroGS::GSStateReset() { icurctx = -1; for(int i = 0; i < 2; ++i) { LPD3DVB pvb = vb[i].pvb; if( pvb != NULL ) pvb->AddRef(); vb[i].Destroy(); memset(&vb[i], 0, sizeof(VB)); vb[i].tex0.tw = 1; vb[i].tex0.th = 1; vb[i].pvb = pvb; vb[i].scissor.x1 = 639; vb[i].scissor.y1 = 479; vb[i].tex0.tbw = 64; } s_RangeMngr.Clear(); g_MemTargs.Destroy(); s_RTs.Destroy(); s_DepthRTs.Destroy(); s_BitwiseTextures.Destroy(); vb[0].ictx = 0; vb[1].ictx = 1; s_bAlphaSet = FALSE; } void ZeroGS::AddMessage(const char* pstr, DWORD ms) { listMsgs.push_back(MESSAGE(pstr, timeGetTime()+ms)); } void ZeroGS::ChangeWindowSize(int nNewWidth, int nNewHeight) { width = nNewWidth > 16 ? nNewWidth : 16; height = nNewHeight > 16 ? nNewHeight : 16; if( !(conf.options & GSOPTION_FULLSCREEN) ) { conf.width = nNewWidth; conf.height = nNewHeight; //SaveConfig(); } } void ZeroGS::SetChangeDeviceSize(int nNewWidth, int nNewHeight) { s_nNewWidth = nNewWidth; s_nNewHeight = nNewHeight; if( !(conf.options & GSOPTION_FULLSCREEN) ) { conf.width = nNewWidth; conf.height = nNewHeight; //SaveConfig(); } } void ZeroGS::Reset() { s_RTs.ResolveAll(); s_DepthRTs.ResolveAll(); vb[0].Unlock(); vb[1].Unlock(); memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts)); s_nLastResolveReset = 0; icurctx = -1; GSStateReset(); Destroy(0); drawfn[0] = KickDummy; drawfn[1] = KickDummy; drawfn[2] = KickDummy; drawfn[3] = KickDummy; drawfn[4] = KickDummy; drawfn[5] = KickDummy; drawfn[6] = KickDummy; drawfn[7] = KickDummy; } void ZeroGS::ChangeDeviceSize(int nNewWidth, int nNewHeight) { int oldscreen = s_nFullscreen; int oldwidth = width, oldheight = height; if( FAILED(Create(nNewWidth&~7, nNewHeight&~7)) ) { DEBUG_LOG("Failed to recreate, changing to old\n"); if( FAILED(Create(oldwidth, oldheight)) ) { MessageBox(NULL, "failed to create dev, exiting...\n", "Error", MB_OK); exit(0); } } if( FAILED(InitDeviceObjects()) ) { MessageBox(NULL, "failed to init dev objs, exiting...\n", "Error", MB_OK); exit(0); } for(int i = 0; i < 2; ++i) { vb[i].bNeedFrameCheck = vb[i].bNeedZCheck = 1; vb[i].CheckFrame(0); } if( oldscreen && !(conf.options & GSOPTION_FULLSCREEN) ) { // if transitioning from full screen RECT rc; rc.left = 0; rc.top = 0; rc.right = conf.width; rc.bottom = conf.height; AdjustWindowRect(&rc, conf.winstyle, FALSE); RECT rcdesktop; GetWindowRect(GetDesktopWindow(), &rcdesktop); SetWindowLong( GShwnd, GWL_STYLE, conf.winstyle ); SetWindowPos(GShwnd, HWND_TOP, ((rcdesktop.right-rcdesktop.left)-(rc.right-rc.left))/2, ((rcdesktop.bottom-rcdesktop.top)-(rc.bottom-rc.top))/2, rc.right-rc.left, rc.bottom-rc.top, SWP_SHOWWINDOW); UpdateWindow(GShwnd); } vb[0].Lock(); vb[1].Lock(); assert( vb[0].pbuf != NULL && vb[1].pbuf != NULL ); } void ZeroGS::SetAA(int mode) { float f; // need to flush all targets s_RTs.ResolveAll(); s_RTs.Destroy(); s_DepthRTs.ResolveAll(); s_DepthRTs.Destroy(); s_AAx = s_AAy = 0; if( mode > 0 ) { s_AAx = (mode+1) / 2; s_AAy = mode / 2; } memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts)); s_nLastResolveReset = 0; vb[0].prndr = NULL; vb[0].pdepth = NULL; vb[0].bNeedFrameCheck = 1; vb[0].bNeedZCheck = 1; vb[1].prndr = NULL; vb[1].pdepth = NULL; vb[1].bNeedFrameCheck = 1; vb[1].bNeedZCheck = 1; f = mode > 0 ? 2.0f : 1.0f; SETRS(D3DRS_POINTSIZE, FtoDW(f)); } #ifdef RELEASE_TO_PUBLIC #define LOAD_VS(Index, ptr) { \ assert( mapShaderResources.find(Index) != mapShaderResources.end() ); \ header = mapShaderResources[Index]; \ assert( (header) != NULL && (header)->index == (Index) ); \ hr = pd3dDevice->CreateVertexShader((DWORD*)(s_lpShaderResources + (header)->offset), &(ptr)); \ if( FAILED(hr) || ptr == NULL ) { \ DEBUG_LOG("errors 0x%x for %d, failed.. try updating your drivers or dx\n", hr, Index); \ return E_FAIL; \ } \ } \ #define LOAD_PS(index, ptr) { \ assert( mapShaderResources.find(index) != mapShaderResources.end() ); \ header = mapShaderResources[index]; \ hr = pd3dDevice->CreatePixelShader((DWORD*)(s_lpShaderResources + (header)->offset), &(ptr)); \ if( FAILED(hr) || ptr == NULL ) { \ DEBUG_LOG("errors 0x%x for %s, failed.. try updating your drivers or dx\n", hr, index); \ return E_FAIL; \ } \ } \ HRESULT ZeroGS::LoadEffects() { assert( s_lpShaderResources != NULL ); // process the header DWORD num = *(DWORD*)s_lpShaderResources; SHADERHEADER* header = (SHADERHEADER*)((BYTE*)s_lpShaderResources + 4); mapShaderResources.clear(); while(num-- > 0 ) { mapShaderResources[header->index] = header; ++header; } // clear the textures for(int i = 0; i < ArraySize(ppsTexture); ++i) { SAFE_RELEASE(ppsTexture[i]); } memset(ppsTexture, 0, sizeof(ppsTexture)); return S_OK; } // called HRESULT ZeroGS::LoadExtraEffects() { HRESULT hr; SHADERHEADER* header; DWORD mask = g_nPixelShaderVer == SHADER_30 ? SH_30 : 0; const int vsshaders[4] = { SH_REGULARVS, SH_TEXTUREVS, SH_REGULARFOGVS, SH_TEXTUREFOGVS }; for(int i = 0; i < 4; ++i) { LOAD_VS(vsshaders[i]|mask, pvs[2*i]); LOAD_VS(vsshaders[i]|mask|SH_CONTEXT1, pvs[2*i+1]); LOAD_VS(vsshaders[i]|mask|SH_WRITEDEPTH, pvs[2*i+8]); LOAD_VS(vsshaders[i]|mask|SH_WRITEDEPTH|SH_CONTEXT1, pvs[2*i+8+1]); } LOAD_VS(SH_BITBLTVS, pvsBitBlt); //LOAD_VS(SH_BITBLTVS|SH_30, pvsBitBlt30); LOAD_PS(SH_REGULARPS|mask, ppsRegular[0]); LOAD_PS(SH_REGULARFOGPS|mask, ppsRegular[1]); LOAD_PS(SH_REGULARPS|SH_WRITEDEPTH|mask, ppsRegular[2]); LOAD_PS(SH_REGULARFOGPS|SH_WRITEDEPTH|mask, ppsRegular[3]); LOAD_PS(SH_BITBLTPS, ppsBitBlt[0]); LOAD_PS(SH_BITBLTAAPS, ppsBitBlt[0]); LOAD_PS(SH_BITBLTDEPTHPS, ppsBitBltDepth[0]); LOAD_PS(SH_BITBLTDEPTHMRTPS, ppsBitBltDepth[1]); LOAD_PS(SH_BITBLTDEPTHTEXPS, ppsBitBltDepthTex[0]); LOAD_PS(SH_BITBLTDEPTHTEXMRTPS, ppsBitBltDepthTex[1]); LOAD_PS(SH_CRTCTARGPS, ppsCRTCTarg[0]); LOAD_PS(SH_CRTCTARGINTERPS, ppsCRTCTarg[1]); LOAD_PS(SH_CRTCPS, ppsCRTC[0]); LOAD_PS(SH_CRTCINTERPS, ppsCRTC[1]); LOAD_PS(SH_CRTC24PS, ppsCRTC24[0]); LOAD_PS(SH_CRTC24INTERPS, ppsCRTC24[1]); LOAD_PS(SH_ZEROPS|mask, ppsOne); LOAD_PS(SH_BASETEXTUREPS, ppsBaseTexture); LOAD_PS(SH_CONVERT16TO32PS, ppsConvert16to32); LOAD_PS(SH_CONVERT32TO16PS, ppsConvert32to16); return S_OK; } LPD3DPS ZeroGS::LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context) { int texwrap; assert( texfilter < NUM_FILTERS ); if(g_nPixelShaderVer == SHADER_20 ) texfilter = 0; if(g_nPixelShaderVer == SHADER_20 ) exactcolor = 0; if( clamp.wms == clamp.wmt ) { switch( clamp.wms ) { case 0: texwrap = TEXWRAP_REPEAT; break; case 1: texwrap = TEXWRAP_CLAMP; break; case 2: texwrap = TEXWRAP_CLAMP; break; default: texwrap = TEXWRAP_REGION_REPEAT; break; } } else if( clamp.wms==3||clamp.wmt==3) texwrap = TEXWRAP_REGION_REPEAT; else texwrap = TEXWRAP_REPEAT_CLAMP; int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0); assert( index < ArraySize(ppsTexture) ); LPD3DPS* pps = ppsTexture+index; if( *pps != NULL ) return *pps; index += NUM_SHADERS*g_nPixelShaderVer; assert( mapShaderResources.find(index) != mapShaderResources.end() ); SHADERHEADER* header = mapShaderResources[index]; if( header == NULL ) DEBUG_LOG("%d %d\n", index%NUM_SHADERS, g_nPixelShaderVer); assert( header != NULL ); HRESULT hr = pd3dDevice->CreatePixelShader((DWORD*)(s_lpShaderResources + header->offset), pps); if( SUCCEEDED(hr) ) return *pps; if( g_bDisplayMsg ) ERROR_LOG("Failed to create shader %d,%d,%d,%d\n", 3, fog, texfilter, 4*clamp.wms+clamp.wmt); return NULL; } #else // not RELEASE_TO_PUBLIC //#define EFFECT_NAME "f:\\ps2dev\\pcsx2\\zerogs\\dx\\" #define EFFECT_NAME ".\\" #define COMPILE_SHADER(name, type, flags) class ZeroGSShaderInclude : public ID3DXInclude { public: int context; STDMETHOD(Open)(D3DXINCLUDE_TYPE IncludeType, LPCSTR pFileName, LPCVOID pParentData, LPCVOID *ppData, UINT *pBytes) { const char* pfilename = pFileName; char strfile[255]; if( strstr(pFileName, "ps2hw_ctx") != NULL ) { _snprintf(strfile, 255, "%sps2hw_ctx%d.fx", EFFECT_NAME, context); pfilename = strfile; } else if( strstr(pFileName, "\\") == NULL ) { _snprintf(strfile, 255, "%s%s", EFFECT_NAME, pFileName); pfilename = strfile; } FILE* f = fopen(pfilename, "rb"); if( f == NULL ) return E_FAIL; fseek(f, 0, SEEK_END); DWORD size = ftell(f); fseek(f, 0, SEEK_SET); char* buffer = new char[size+1]; fread(buffer, size, 1, f); buffer[size] = 0; *ppData = buffer; *pBytes = size; fclose(f); return S_OK; } STDMETHOD(Close)(LPCVOID pData) { delete[] (char*)pData; return S_OK; } }; #define LOAD_VS(name, ptr, shaderver) { \ LPD3DXBUFFER pShader, pError; \ V(D3DXCompileShaderFromFile(EFFECT_NAME"ps2hw.fx", pmacros, pInclude, name, shaderver, ShaderFlagsVS, &pShader, &pError, NULL)); \ if( FAILED(hr) ) \ { \ DEBUG_LOG("Failed to load vs %s: \n%s\n", name, reinterpret_cast(pError->GetBufferPointer())); \ SAFE_RELEASE(pShader); \ SAFE_RELEASE(pError); \ return hr; \ } \ hr = pd3dDevice->CreateVertexShader((const DWORD*)pShader->GetBufferPointer(), &(ptr)); \ SAFE_RELEASE(pShader); \ SAFE_RELEASE(pError); \ } \ #define LOAD_PS(name, ptr, shmodel) { \ LPD3DXBUFFER pShader, pError; \ SAFE_RELEASE(ptr); \ V(D3DXCompileShaderFromFile(EFFECT_NAME"ps2hw.fx", pmacros, pInclude, name, shmodel, ShaderFlagsPS, &pShader, &pError, NULL)); \ if( FAILED(hr) ) \ { \ DEBUG_LOG("Failed to load ps %s: \n%s\n", name, reinterpret_cast(pError->GetBufferPointer())); \ SAFE_RELEASE(pShader); \ SAFE_RELEASE(pError); \ return hr; \ } \ hr = pd3dDevice->CreatePixelShader((const DWORD*)pShader->GetBufferPointer(), &(ptr)); \ SAFE_RELEASE(pShader); \ SAFE_RELEASE(pError); \ if( FAILED(hr) || ptr == NULL ) { \ DEBUG_LOG("errors 0x%x for %s, failed.. try updating your drivers or dx\n", hr, name); \ return E_FAIL; \ } \ } \ HRESULT ZeroGS::LoadEffects() { // clear the textures for(int i = 0; i < ArraySize(ppsTexture); ++i) { SAFE_RELEASE(ppsTexture[i]); } memset(ppsTexture, 0, sizeof(ppsTexture)); return S_OK; } #define VS_VER (g_nPixelShaderVer == SHADER_20?"vs_2_0":"vs_3_0") #define PS_VER (g_nPixelShaderVer == SHADER_20?"ps_2_0":"ps_3_0") HRESULT ZeroGS::LoadExtraEffects() { HRESULT hr; DWORD ShaderFlagsPS = !DEBUG_PS2 ? 0 : (D3DXSHADER_DEBUG|D3DXSHADER_SKIPOPTIMIZATION); DWORD ShaderFlagsVS = !DEBUG_PS2 ? 0 : (D3DXSHADER_DEBUG|D3DXSHADER_SKIPOPTIMIZATION); ZeroGSShaderInclude inc; inc.context = 0; ZeroGSShaderInclude* pInclude = &inc; //assert( g_nPixelShaderVer == SHADER_30) ; const char* pstrps = g_nPixelShaderVer == SHADER_20 ? "ps_2_0" : "ps_2_a"; const char* pvsshaders[4] = { "RegularVS", "TextureVS", "RegularFogVS", "TextureFogVS" }; D3DXMACRO macros[2] = {0}; D3DXMACRO* pmacros = NULL; macros[0].Name = "WRITE_DEPTH"; macros[0].Definition = "1"; for(int i = 0; i < 4; ++i) { pmacros = NULL; inc.context = 0; LOAD_VS(pvsshaders[i], pvs[2*i], VS_VER); inc.context = 1; LOAD_VS(pvsshaders[i], pvs[2*i+1], VS_VER); pmacros = macros; inc.context = 0; LOAD_VS(pvsshaders[i], pvs[2*i+8], VS_VER); inc.context = 1; LOAD_VS(pvsshaders[i], pvs[2*i+8+1], VS_VER); } inc.context = 0; pmacros = NULL; LOAD_PS("RegularPS", ppsRegular[0], PS_VER); LOAD_PS("RegularFogPS", ppsRegular[1], PS_VER); pmacros = macros; LOAD_PS("RegularPS", ppsRegular[2], PS_VER); LOAD_PS("RegularFogPS", ppsRegular[3], PS_VER); pmacros = NULL; LOAD_VS("BitBltVS", pvsBitBlt, "vs_2_0"); LOAD_PS("BitBltPS", ppsBitBlt[0], pstrps); LOAD_PS("BitBltAAPS", ppsBitBlt[1], pstrps); LOAD_PS("BitBltDepthPS", ppsBitBltDepth[0], pstrps); LOAD_PS("BitBltDepthMRTPS", ppsBitBltDepth[1], pstrps); LOAD_PS("BitBltDepthTexPS", ppsBitBltDepthTex[0], pstrps); LOAD_PS("BitBltDepthTexMRTPS", ppsBitBltDepthTex[1], pstrps); LOAD_PS("CRTCTargPS", ppsCRTCTarg[0], pstrps); LOAD_PS("CRTCTargInterPS", ppsCRTCTarg[1], pstrps); LOAD_PS("CRTCPS", ppsCRTC[0], pstrps); LOAD_PS("CRTCInterPS", ppsCRTC[1], pstrps); LOAD_PS("CRTC24PS", ppsCRTC24[0], pstrps); LOAD_PS("CRTC24InterPS", ppsCRTC24[1], pstrps); LOAD_PS("ZeroPS", ppsOne, PS_VER); LOAD_PS("BaseTexturePS", ppsBaseTexture, pstrps); LOAD_PS("Convert16to32PS", ppsConvert16to32, pstrps); LOAD_PS("Convert32to16PS", ppsConvert32to16, pstrps); return S_OK; } LPD3DPS ZeroGS::LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context) { int texwrap; assert( texfilter < NUM_FILTERS ); //assert( g_nPixelShaderVer == SHADER_30 ); if( clamp.wms == clamp.wmt ) { switch( clamp.wms ) { case 0: texwrap = TEXWRAP_REPEAT; break; case 1: texwrap = TEXWRAP_CLAMP; break; case 2: texwrap = TEXWRAP_CLAMP; break; default: texwrap = TEXWRAP_REGION_REPEAT; break; } } else if( clamp.wms==3||clamp.wmt==3) texwrap = TEXWRAP_REGION_REPEAT; else texwrap = TEXWRAP_REPEAT_CLAMP; int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0); LPD3DPS* pps = ppsTexture+index; if( *pps != NULL ) return *pps; ZeroGSShaderInclude inc; inc.context = context; HRESULT hr = LoadShaderFromType(EFFECT_NAME"ps2hw.fx", type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, g_nPixelShaderVer, 0, pd3dDevice, &inc, pps); if( SUCCEEDED(hr) ) return *pps; DEBUG_LOG("Failed to create shader %d,%d,%d,%d\n", type, fog, texfilter, 4*clamp.wms+clamp.wmt); return NULL; } #endif // RELEASE_TO_PUBLIC HRESULT ZeroGS::InitDeviceObjects() { //g_GameSettings |= 0;//GAME_VSSHACK|GAME_FULL16BITRES|GAME_NODEPTHRESOLVE|GAME_FASTUPDATE; //s_bWriteDepth = TRUE; DeleteDeviceObjects(); int i; HRESULT hr; SETRS(D3DRS_SRCBLEND, D3DBLEND_ONE); SETRS(D3DRS_DESTBLEND, D3DBLEND_ONE); if( pFont ) V_RETURN( pFont->OnResetDevice() ); V_RETURN( D3DXCreateSprite( pd3dDevice, &pSprite ) ); V(D3DXCreateTextureFromResource(pd3dDevice, hInst, MAKEINTRESOURCE( IDB_ZEROGSLOGO ), &ptexLogo)); for(i = 0; i < 2; ++i) { V_RETURN(pd3dDevice->CreateVertexBuffer( sizeof(VertexGPU) * POINT_BUFFERSIZE, D3DUSAGE_DYNAMIC|D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &vb[i].pvb, NULL)); } // create the blocks texture D3DFORMAT blockfmt = D3DFMT_R32F; g_fBlockMult = 1; if( FAILED(hr = pd3dDevice->CreateTexture(BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 1, 0, blockfmt, D3DPOOL_MANAGED, &ptexBlocks, NULL)) ) { blockfmt = D3DFMT_G16R16; g_fBlockMult = 65535.0f*(float)g_fiGPU_TEXWIDTH; V_RETURN(pd3dDevice->CreateTexture(BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 1, 0, blockfmt, D3DPOOL_MANAGED, &ptexBlocks, NULL)); } if( blockfmt == D3DFMT_R32F ) { if( FAILED(hr = pd3dDevice->CreateTexture(BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 1, 0, D3DFMT_A32B32G32R32F, D3DPOOL_MANAGED, &ptexBilinearBlocks, NULL)) ) { DEBUG_LOG("Failed to create bilinear block texture, fmt = D3DFMT_A32B32G32R32F\n"); } } else ptexBilinearBlocks = NULL; // fill a simple rect V_RETURN(pd3dDevice->CreateVertexBuffer( 4 * sizeof(VertexGPU), D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &pvbRect, NULL)); VertexGPU* pvert; pvbRect->Lock(0, 0, (void**)&pvert, 0); pvert->x = -0x7fff; pvert->y = 0x7fff; pvert->z = 0; pvert->s = 0; pvert->t = 0; pvert++; pvert->x = 0x7fff; pvert->y = 0x7fff; pvert->z = 0; pvert->s = 1; pvert->t = 0; pvert++; pvert->x = -0x7fff; pvert->y = -0x7fff; pvert->z = 0; pvert->s = 0; pvert->t = 1; pvert++; pvert->x = 0x7fff; pvert->y = -0x7fff; pvert->z = 0; pvert->s = 1; pvert->t = 1; pvert++; pvbRect->Unlock(); D3DLOCKED_RECT lock, lockbilinear; ptexBlocks->LockRect(0, &lock, NULL, 0); if( ptexBilinearBlocks != NULL ) ptexBilinearBlocks->LockRect(0, &lockbilinear, NULL, 0); BLOCK::FillBlocks(&lock, ptexBilinearBlocks != NULL ? &lockbilinear : NULL, blockfmt); ptexBlocks->UnlockRect(0); if( ptexBilinearBlocks != NULL ) ptexBilinearBlocks->UnlockRect(0); // create the conversion textures V_RETURN(pd3dDevice->CreateTexture(256, 256, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &ptexConv16to32, NULL)); ptexConv16to32->LockRect(0, &lock, NULL, 0); assert(lock.Pitch == 256*4); u32* dst = (u32*)lock.pBits; for(i = 0; i < 256*256; ++i) { DWORD tempcol = RGBA16to32(i); // have to flip r and b *dst++ = (tempcol&0xff00ff00)|((tempcol&0xff)<<16)|((tempcol&0xff0000)>>16); } ptexConv16to32->UnlockRect(0); V_RETURN(pd3dDevice->CreateVolumeTexture(32, 32, 32, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &ptexConv32to16, NULL)); D3DLOCKED_BOX lockbox; ptexConv32to16->LockBox(0, &lockbox, NULL, 0); dst = (u32*)lockbox.pBits; for(i = 0; i < 32; ++i) { for(int j = 0; j < 32; ++j) { for(int k = 0; k < 32; ++k) { u32 col = (i<<10)|(j<<5)|k; *dst++ = ((col&0xff)<<16)|(col&0xff00); } } } ptexConv32to16->UnlockBox(0); // set samplers for(i = 0; i < 8; ++i) { pd3dDevice->SetSamplerState(i, D3DSAMP_MINFILTER, D3DTEXF_POINT); pd3dDevice->SetSamplerState(i, D3DSAMP_MAGFILTER, D3DTEXF_POINT); pd3dDevice->SetSamplerState(i, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); pd3dDevice->SetSamplerState(i, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); } //pd3dDevice->SetSamplerState(SAMP_SRC, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP); //pd3dDevice->SetSamplerState(SAMP_SRC, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSW, D3DTADDRESS_CLAMP); // can be used as a 3d texture pd3dDevice->SetSamplerState(SAMP_BITWISEANDX, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BITWISEANDX, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BITWISEANDY, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BITWISEANDY, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); pd3dDevice->SetTexture(SAMP_BLOCKS, ptexBlocks); pd3dDevice->SetTexture(SAMP_BILINEARBLOCKS, ptexBilinearBlocks); pd3dDevice->SetVertexDeclaration(pdecl); SETRS(D3DRS_STENCILENABLE, FALSE); SETRS(D3DRS_SCISSORTESTENABLE, 1); SETRS(D3DRS_SEPARATEALPHABLENDENABLE, USEALPHABLENDING); SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE); SETRS(D3DRS_DESTBLENDALPHA, D3DBLEND_ZERO); SETRS(D3DRS_CULLMODE, D3DCULL_NONE); SETRS(D3DRS_BLENDFACTOR, 0x80000000); SETRS(D3DRS_COLORWRITEENABLE1, 0); // points SETRS(D3DRS_POINTSCALEENABLE, FALSE); SETRS(D3DRS_POINTSIZE, FtoDW(1.0f)); g_nDepthBias = 0; SETRS(D3DRS_DEPTHBIAS, FtoDW(0.000015f)); SETCONSTF(GPU_Z, g_vdepth);//vb[icurctx].zbuf.psm&3]); s_vznorm = DXVEC4(g_filog32, 0, 0,0); SETCONSTF(GPU_ZNORM, s_vznorm); return S_OK; } void ZeroGS::DeleteDeviceObjects() { if( s_aviinit ) { StopCapture(); STOP_AVI(); DEBUG_LOG("zerogs.avi stopped"); s_aviinit = 0; } SAFE_RELEASE(s_ptexAVICapture); if( pFont ) pFont->OnLostDevice(); SAFE_RELEASE(pSprite); g_MemTargs.Destroy(); s_RTs.Destroy(); s_DepthRTs.Destroy(); s_BitwiseTextures.Destroy(); SAFE_RELEASE(s_ptexInterlace); SAFE_RELEASE(pvbRect); SAFE_RELEASE(ptexBlocks); SAFE_RELEASE(ptexBilinearBlocks); SAFE_RELEASE(ptexConv16to32); SAFE_RELEASE(ptexConv32to16); s_bAlphaSet = FALSE; vb[0].Unlock(); SAFE_RELEASE(vb[0].pvb); vb[1].Unlock(); SAFE_RELEASE(vb[1].pvb); } void ZeroGS::Prim() { if( g_bIsLost ) return; VB& curvb = vb[prim->ctxt]; if( curvb.CheckPrim() ) Flush(prim->ctxt); curvb.curprim._val = prim->_val; // flush the other pipe if sharing the same buffer // if( vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp && vb[!prim->ctxt].dwCount > 0 ) // { // assert( vb[prim->ctxt].dwCount == 0 ); // Flush(!prim->ctxt); // } curvb.curprim.prim = prim->prim; vb[prim->ctxt].Lock(); } int GetTexFilter(const tex1Info& tex1) { // always force if( conf.bilinear == 2 ) return 1; int texfilter = 0; if( conf.bilinear && ptexBilinearBlocks != NULL ) { if( tex1.mmin <= 1 ) texfilter = tex1.mmin|tex1.mmag; else texfilter = tex1.mmag ? ((tex1.mmin+2)&5) : tex1.mmin; texfilter = texfilter == 1 || texfilter == 4 || texfilter == 5; } return texfilter; } void ZeroGS::ReloadEffects() { #ifndef RELEASE_TO_PUBLIC for(int i = 0; i < ArraySize(ppsTexture); ++i) { SAFE_RELEASE(ppsTexture[i]); } memset(ppsTexture, 0, sizeof(ppsTexture)); LoadExtraEffects(); #endif } static int s_ClutResolve = 0; static int s_PSM8Resolve = 0; void ZeroGS::Flush(int context) { assert( context >= 0 && context <= 1 ); #ifndef RELEASE_TO_PUBLIC if( g_bUpdateEffect ) { ReloadEffects(); g_bUpdateEffect = 0; } #endif VB& curvb = vb[context]; const pixTest curtest = curvb.test; if( curvb.dwCount == 0 || (curtest.zte && curtest.ztst == 0) || g_bIsLost ) { curvb.dwCount = 0; return; } if( s_RangeMngr.ranges.size() > 0 ) { // don't want infinite loop DWORD prevcount = curvb.dwCount; curvb.dwCount = 0; FlushTransferRanges(curvb.curprim.tme ? &curvb.tex0 : NULL); curvb.dwCount = prevcount; //if( curvb.dwCount == 0 ) // return; } if( curvb.bNeedTexCheck ) { curvb.FlushTexData(); if( curvb.dwCount == 0 ) return; } if( !s_bBeginScene ) { pd3dDevice->BeginScene(); s_bBeginScene = TRUE; } curvb.Unlock(); LPD3DTEX ptexRenderTargetCached = NULL; int cachedtbp0, cachedtbw, cachedtbh; //s_bWriteDepth = TRUE; //static int lasttime = 0; //fprintf(gsLog, "%d: %d\n", g_SaveFrameNum, timeGetTime()-lasttime); //lasttime = timeGetTime(); if( curvb.bNeedFrameCheck || curvb.bNeedZCheck ) { int tpsm = curvb.tex0.psm; if( curvb.bNeedTexCheck ) tpsm = (curvb.uNextTex0Data[0] >> 20) & 0x3f; if( tpsm == PSMT8H && (g_GameSettings&GAME_NOTARGETCLUT) ) { curvb.dwCount = 0; return; } // check for the texture before checking the frame (since things could get destroyed) if( (g_GameSettings&GAME_PARTIALPOINTERS) &&curvb.curprim.tme ) { // if( (curvb.gsfb.fbp&0xff) != 0 ) { // curvb.dwCount = 0; // return; // } // if texture is part of a previous target, use that instead int tbw = curvb.tex0.tbw; int tbp0 = curvb.tex0.tbp0; if( curvb.bNeedTexCheck ) { // not yet initied, but still need to get correct target! (xeno3 ingame) tbp0 = (curvb.uNextTex0Data[0] & 0x3fff); tbw = ((curvb.uNextTex0Data[0] >> 14) & 0x3f) * 64; } if( (tpsm&~1) == 0 ) { CRenderTarget* ptemptarg = s_RTs.GetTarg(tbp0, tbw); if( ptemptarg != NULL && (ptemptarg->psm&~1) == (tpsm&~1) ) { ptexRenderTargetCached = ptemptarg->ptex; ptexRenderTargetCached->AddRef(); cachedtbp0 = ptemptarg->fbp; cachedtbw = ptemptarg->fbw; cachedtbh = ptemptarg->fbh; } } } curvb.CheckFrame(curvb.curprim.tme ? curvb.tex0.tbp0 : 0); } // if( g_SaveFrameNum == 976 ) { // curvb.prndr->ConvertTo32(); // } if( curvb.prndr == NULL || curvb.pdepth == NULL ) { WARN_LOG("Current render target NULL (ctx: %d)", context); curvb.dwCount = 0; SAFE_RELEASE(ptexRenderTargetCached); return; } #if defined(PRIM_LOG) && defined(PCSX2_DEBUG) static const char* patst[8] = { "NEVER", "ALWAYS", "LESS", "LEQUAL", "EQUAL", "GEQUAL", "GREATER", "NOTEQUAL"}; static const char* pztst[4] = { "NEVER", "ALWAYS", "GEQUAL", "GREATER" }; static const char* pafail[4] = { "KEEP", "FB_ONLY", "ZB_ONLY", "RGB_ONLY" }; PRIM_LOG("**Drawing ctx %d, num %d, fbp: 0x%x, zbp: 0x%x, fpsm: %d, zpsm: %d, fbw: %d\n", context, vb[context].dwCount, curvb.prndr->fbp, curvb.zbuf.zbp, curvb.prndr->psm, curvb.zbuf.psm, curvb.prndr->fbw); PRIM_LOG("prim: prim=%x iip=%x tme=%x fge=%x abe=%x aa1=%x fst=%x ctxt=%x fix=%x\n", curvb.curprim.prim, curvb.curprim.iip, curvb.curprim.tme, curvb.curprim.fge, curvb.curprim.abe, curvb.curprim.aa1, curvb.curprim.fst, curvb.curprim.ctxt, curvb.curprim.fix); PRIM_LOG("test: ate:%d, atst: %s, aref: %d, afail: %s, date: %d, datm: %d, zte: %d, ztst: %s, fba: %d\n", curvb.test.ate, patst[curvb.test.atst], curvb.test.aref, pafail[curvb.test.afail], curvb.test.date, curvb.test.datm, curvb.test.zte, pztst[curvb.test.ztst], curvb.fba.fba); PRIM_LOG("alpha: A%d B%d C%d D%d FIX:%d pabe: %d; aem: %d, ta0: %d, ta1: %d\n", curvb.alpha.a, curvb.alpha.b, curvb.alpha.c, curvb.alpha.d, curvb.alpha.fix, gs.pabe, gs.texa.aem, gs.texa.ta[0], gs.texa.ta[1]); PRIM_LOG("tex0: tbp0=0x%x, tbw=%d, psm=0x%x, tw=%d, th=%d, tcc=%d, tfx=%d, cbp=0x%x, cpsm=0x%x, csm=%d, csa=%d, cld=%d\n", curvb.tex0.tbp0, curvb.tex0.tbw, curvb.tex0.psm, curvb.tex0.tw, curvb.tex0.th, curvb.tex0.tcc, curvb.tex0.tfx, curvb.tex0.cbp, curvb.tex0.cpsm, curvb.tex0.csm, curvb.tex0.csa, curvb.tex0.cld); PRIM_LOG("frame: %d\n\n", g_SaveFrameNum); #endif CMemoryTarget* pmemtarg = NULL; CRenderTarget* ptextarg = NULL; // kh2 hack // if( curvb.dwCount == 2 && curvb.curprim.tme == 0 && curvb.curprim.abe == 0 && (curvb.tex0.tbp0 == 0x2a00 || curvb.tex0.tbp0==0x1d00) ) { // // skip // DEBUG_LOG("skipping\n"); // g_SaveFrameNum++; // curvb.dwCount = 0; // return; // } if( curtest.date || gs.pabe ) SetDestAlphaTest(); // set the correct pixel shaders if( curvb.curprim.tme && ptexRenderTargetCached == NULL ) { // if texture is part of a previous target, use that instead int tbw = curvb.tex0.tbw; int tbp0 = curvb.tex0.tbp0; int tpsm = curvb.tex0.psm; if( curvb.bNeedTexCheck ) { // not yet initied, but still need to get correct target! (xeno3 ingame) tbp0 = (curvb.uNextTex0Data[0] & 0x3fff); tbw = ((curvb.uNextTex0Data[0] >> 14) & 0x3f) * 64; tpsm = (curvb.uNextTex0Data[0] >> 20) & 0x3f; } ptextarg = s_RTs.GetTarg(tbp0, tbw); if( ptextarg == NULL && tpsm == PSMT8 ) { // check for targets with half the width ptextarg = s_RTs.GetTarg(tbp0, tbw/2); if( ptextarg == NULL ) { tbp0 &= ~0x7ff; ptextarg = s_RTs.GetTarg(tbp0, tbw/2); // mgs3 hack if( ptextarg == NULL ) { // check the next level (mgs3) tbp0 &= ~0xfff; ptextarg = s_RTs.GetTarg(tbp0, tbw/2); // mgs3 hack } if( ptextarg != NULL && ptextarg->start > tbp0*256 ) { // target beyond range, so ignore ptextarg = NULL; } } // if( ptextarg != NULL ) { // // make sure target isn't invalidated by the ranges // for(vector::iterator itrange = s_RangeMngr.ranges.begin(); itrange != s_RangeMngr.ranges.end(); ++itrange ) { // // int start = itrange->start; // int end = itrange->end; // // // if start and end are in the range or there's a range that is between tbp0 and start, then remove // if( (start <= tbp0*256 && end > tbp0*256) || (start >= ptextarg->fbp*256 && start <= tbp0*256) ) { // ptextarg = NULL; // break; // } // } // } if( ptextarg != NULL && !(ptextarg->status&CRenderTarget::TS_NeedUpdate) ) { // find the equivalent memtarg if( s_PSM8Resolve == 0 ) { //|| (s_PSM8Resolve > 0 && s_PSM8Resolve+128 < g_SaveFrameNum) ) { DWORD prevcount = curvb.dwCount; curvb.dwCount = 0; if( ptextarg->pmimicparent != NULL ) ptextarg->pmimicparent->Resolve(); else ptextarg->Resolve(); curvb.dwCount = prevcount; s_PSM8Resolve = g_SaveFrameNum; // stop from resolving again (once per frame) } tex0Info mytex0 = curvb.tex0; mytex0.tbp0 = tbp0; if( ptextarg->pmimicparent != NULL ) { mytex0.tbp0 = ptextarg->pmimicparent->fbp; } pmemtarg = g_MemTargs.GetMemoryTarget(mytex0, 1); // have to add an offset to all texture reads mytex0.tbp0 = tbp0; // change so that SetTexVariablesInt can set the right offsets SetTexVariablesInt(context, GetTexFilter(curvb.tex1), mytex0, pmemtarg, s_bForceTexFlush); curvb.bVarsTexSync = TRUE; ptextarg = NULL; // won't be needing this anymore } } if( (tpsm&0x30)==0x30 && ptextarg == NULL ) { // try depth ptextarg = s_DepthRTs.GetTarg(tbp0, tbw); } if( ptextarg == NULL && (g_GameSettings&GAME_TEXTURETARGS) ) { // check if any part of the texture intersects the current target if( !PSMT_ISCLUT(tpsm) && curvb.tex0.tbp0 >= curvb.frame.fbp && (curvb.tex0.tbp0 << 8) < curvb.prndr->end) { ptextarg = curvb.prndr; } } if( ptextarg != NULL && !(ptextarg->status&CRenderTarget::TS_NeedUpdate) ) { if( PSMT_ISCLUT(tpsm) && tpsm != PSMT8H && tpsm != PSMT8 ) { // handle 8h cluts // don't support clut targets, read from mem // 4hl - kh2 check if( tpsm != PSMT4HL && tpsm != PSMT4HH && s_ClutResolve <= 1 ) { // xenosaga requires 2 resolves DWORD prevcount = curvb.dwCount; curvb.dwCount = 0; ptextarg->Resolve(); s_ClutResolve++; curvb.dwCount = prevcount; } ptextarg = NULL; } else { if( ptextarg == curvb.prndr ) { // need feedback if( ptextarg->pmimicparent != NULL ) { // if the target is mimic, create the feedback of the parent assert( ptextarg->pmimicparent->ptex == ptextarg->ptex || ptextarg->pmimicparent->ptexFeedback == ptextarg->ptex ); SAFE_RELEASE(ptextarg->ptexFeedback); SAFE_RELEASE(ptextarg->psurfFeedback); ptextarg->pmimicparent->CreateFeedback(); ptextarg->ptex = ptextarg->pmimicparent->ptex; ptextarg->ptexFeedback = ptextarg->pmimicparent->ptexFeedback; ptextarg->ptexFeedback->AddRef(); ptextarg->psurf = ptextarg->pmimicparent->psurf; ptextarg->psurfFeedback = ptextarg->pmimicparent->psurfFeedback; ptextarg->psurfFeedback->AddRef(); } else curvb.prndr->CreateFeedback(); pd3dDevice->SetRenderTarget(1, (s_bWriteDepth && curvb.pdepth != NULL) ? curvb.pdepth->psurf : NULL); } } } else ptextarg = NULL; } #ifdef PCSX2_DEBUG if( g_bSaveFlushedFrame & 0x80000000 ) { char str[255]; sprintf(str, "rndr%u.tga", g_SaveFrameNum); D3DXSaveSurfaceToFile(str, D3DXIFF_TGA, curvb.prndr->psurf, NULL, NULL); } #endif if( conf.options & GSOPTION_WIREFRAME ) { // always render first few geometry as solid if( s_nWireframeCount > 0 ) { SETRS(D3DRS_FILLMODE, D3DFILL_SOLID); } } if( !curvb.bVarsSetTarg ) SetContextTarget(context); else { assert( curvb.pdepth != NULL ); if( curvb.pdepth->status & CRenderTarget::TS_Virtual) { if( !curvb.zbuf.zmsk ) { CRenderTarget* ptemp = s_DepthRTs.Promote(curvb.pdepth->fbp|(curvb.pdepth->fbw<<16)); assert( ptemp == curvb.pdepth ); } else curvb.pdepth->status &= ~CRenderTarget::TS_NeedUpdate; } if( (curvb.pdepth->status & CRenderTarget::TS_NeedUpdate) || (curvb.prndr->status & CRenderTarget::TS_NeedUpdate) ) SetContextTarget(context); } SetTexVariables(context); if( ptextarg == NULL && pmemtarg == NULL ) { pmemtarg = g_MemTargs.GetMemoryTarget(curvb.tex0, 1); if( vb[context].bVarsTexSync ) { if( vb[context].pmemtarg != pmemtarg ) { SetTexVariablesInt(context, GetTexFilter(curvb.tex1), curvb.tex0, pmemtarg, s_bForceTexFlush); vb[context].bVarsTexSync = TRUE; } } else { SetTexVariablesInt(context, GetTexFilter(curvb.tex1), curvb.tex0, pmemtarg, s_bForceTexFlush); vb[context].bVarsTexSync = TRUE; INC_TEXVARS(); } } icurctx = context; assert( !(curvb.prndr->status&CRenderTarget::TS_NeedUpdate) ); curvb.prndr->status = 0; if( curvb.pdepth != NULL ) { assert( !(curvb.pdepth->status&CRenderTarget::TS_NeedUpdate) ); if( !curvb.zbuf.zmsk ) { assert( !(curvb.pdepth->status & CRenderTarget::TS_Virtual) ); curvb.pdepth->status = 0; } } s_dwColorWrite = (curvb.prndr->psm&0xf) == 1 ? (D3DCOLORWRITEENABLE_BLUE|D3DCOLORWRITEENABLE_GREEN|D3DCOLORWRITEENABLE_RED) : 0xf; if( ((curvb.frame.fbm)&0xff) == 0xff) s_dwColorWrite &= ~D3DCOLORWRITEENABLE_RED; if( ((curvb.frame.fbm>>8)&0xff) == 0xff) s_dwColorWrite &= ~D3DCOLORWRITEENABLE_GREEN; if( ((curvb.frame.fbm>>16)&0xff) == 0xff) s_dwColorWrite &= ~D3DCOLORWRITEENABLE_BLUE; if( ((curvb.frame.fbm>>24)&0xff) == 0xff) s_dwColorWrite &= ~D3DCOLORWRITEENABLE_ALPHA; SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite); pd3dDevice->SetScissorRect(&curvb.prndr->scissorrect); // need to always set it since something in this code resets it // set the shaders pd3dDevice->SetVertexShader(pvs[2*((curvb.curprim._val>>1)&3)+8*s_bWriteDepth+context]); pd3dDevice->SetStreamSource(0, curvb.pvb, curvb.dwCurOff*sizeof(VertexGPU), sizeof(VertexGPU)); DWORD dwUsingSpecialTesting = 0; DWORD dwFilterOpts = 0; IDirect3DPixelShader9* pps; // need exact if equal or notequal int exactcolor = 0; if( g_nPixelShaderVer != SHADER_20 ) // ffx2 breaks when ==7 exactcolor = (curtest.ate && curtest.aref <= 128) && (curtest.atst==4);//||curtest.atst==7); int shadertype = 0; // set the correct pixel shaders if( curvb.curprim.tme ) { if( curvb.ptexClamp[0] != NULL ) pd3dDevice->SetTexture(SAMP_BITWISEANDX, curvb.ptexClamp[0]); if( curvb.ptexClamp[1] != NULL ) pd3dDevice->SetTexture(SAMP_BITWISEANDY, curvb.ptexClamp[1]); if( ptexRenderTargetCached != NULL ) { DXVEC4 vpageoffset; vpageoffset.w = 0; int psm = curvb.tex0.psm; assert( !PSMT_ISCLUT(curvb.tex0.psm)); pps = LoadShadeEffect(1, 0, curvb.curprim.fge, curvb.tex0.tcc && gs.texa.aem && (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S), exactcolor, curvb.clamp, context); pd3dDevice->SetTexture(SAMP_MEMORY0+context, ptexRenderTargetCached); s_ptexCurSet[context] = ptexRenderTargetCached; if( curvb.tex1.mmag ) { pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); dwFilterOpts |= 1; } if( curvb.tex1.mmin ) { pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); dwFilterOpts |= 2; } DXVEC4 vTexDims; vTexDims.x = curvb.tex0.tw / (float)cachedtbw; vTexDims.y = curvb.tex0.th / (float)cachedtbh; // u32 tbp0 = curvb.tex0.tbp0 >> 5; // align to a page // int blockheight = 32; // int ycoord = ((curvb.tex0.tbp0-cachedtbp0)/(32*(cachedtbw>>6))) * blockheight; // int xcoord = (((curvb.tex0.tbp0-cachedtbp0)%(32*(cachedtbw>>6)))) * 2; //// xcoord += ptextarg->targoffx; //// ycoord += ptextarg->targoffy; // vTexDims.z = (float)xcoord / (float)cachedtbw; // vTexDims.w = (float)ycoord / (float)cachedtbh; vTexDims.z = vTexDims.w = 0; SETCONSTF(GPU_TEXDIMS0+context, vTexDims); SETCONSTF(GPU_PAGEOFFSET0+context, vpageoffset); if( g_bSaveTex ) D3DXSaveTextureToFile("tex.tga", D3DXIFF_TGA, ptexRenderTargetCached, NULL); } else if( ptextarg != NULL ) { if( ptextarg->IsDepth() ) SetWriteDepth(); DXVEC4 vpageoffset; vpageoffset.w = 0; shadertype = 1; if( (curvb.tex0.psm == PSMT8 || curvb.tex0.psm == PSMT8H) && !(g_GameSettings&GAME_NOTARGETCLUT) ) { // load the clut to memory LPD3DTEX ptexclut = NULL; pd3dDevice->CreateTexture(256, 1, 1, 0, (curvb.tex0.cpsm&2) ? D3DFMT_A1R5G5B5 : D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &ptexclut, NULL); if( ptexclut != NULL ) { D3DLOCKED_RECT lock; ptexclut->LockRect(0, &lock, NULL, D3DLOCK_NOSYSLOCK); // fill the buffer by decoding the clut int nClutOffset = 0, clutsize; int entries = (curvb.tex0.psm&3)==3 ? 256 : 16; if( curvb.tex0.cpsm <= 1 ) { // 32 bit nClutOffset = 64 * curvb.tex0.csa; clutsize = min(entries, 256-curvb.tex0.csa*16)*4; } else { nClutOffset = 64 * (curvb.tex0.csa&15) + (curvb.tex0.csa>=16?2:0); clutsize = min(entries, 512-curvb.tex0.csa*16)*2; } if( curvb.tex0.cpsm <= 1 ) { // 32 bit memcpy_amd(lock.pBits, ZeroGS::g_pbyGSClut+nClutOffset, clutsize); } else { u16* pClutBuffer = (u16*)(ZeroGS::g_pbyGSClut + nClutOffset); u16* pclut = (u16*)lock.pBits; int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset&0x3ff)/2)+clutsize-512; if( left > 0 ) clutsize -= left; while(clutsize > 0) { pclut[0] = pClutBuffer[0]; pclut++; pClutBuffer+=2; clutsize -= 2; } if( left > 0) { pClutBuffer = (u16*)(ZeroGS::g_pbyGSClut + 2); while(left > 0) { pclut[0] = pClutBuffer[0]; left -= 2; pClutBuffer += 2; pclut++; } } } ptexclut->UnlockRect(0); s_vecTempTextures.push_back(ptexclut); pd3dDevice->SetTexture(SAMP_FINAL, ptexclut); if( g_bSaveTex ) D3DXSaveTextureToFile("clut.tga", D3DXIFF_TGA, ptexclut, NULL); } if( g_nPixelShaderVer != SHADER_20 && (ptextarg->psm & 2) ) { // 16 bit texture shadertype = 4; DXVEC4 v; v.x = 16.0f / (float)ptextarg->fbw; v.y = 64.0f / (float)ptextarg->fbh; v.z = 0.5f * v.x; v.w = 0.5f * v.y; SETCONSTF(GPU_TEXOFFSET0, v); v.x = 1; v.y = -0.5f; v.z = 0; v.w = 0.0001f; SETCONSTF(GPU_PAGEOFFSET0, v); pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); pd3dDevice->SetTexture(SAMP_BILINEARBLOCKS, ptexConv32to16); } else shadertype = 2; } else { if( PSMT_ISCLUT(curvb.tex0.psm) ) WARN_LOG("Using render target with CLUTs %d!\n", curvb.tex0.psm); else { if( (curvb.tex0.psm&2) != (ptextarg->psm&2) && (g_nPixelShaderVer != SHADER_20 || !curvb.curprim.fge) ) { if( curvb.tex0.psm & 2 ) { // converting from 32->16 shadertype = 3; DXVEC4 v; v.x = 16.0f / (float)curvb.tex0.tw; v.y = 64.0f / (float)curvb.tex0.th; v.z = 0.5f * v.x; v.w = 0.5f * v.y; SETCONSTF(GPU_TEXOFFSET0+context, v); vpageoffset.x = -0.1f / 256.0f; vpageoffset.y = -0.001f / 256.0f; vpageoffset.z = -0.1f / ptextarg->fbh; vpageoffset.w = ((ptextarg->psm&0x30)==0x30)?-1.0f:0.0f; pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); pd3dDevice->SetTexture(SAMP_BILINEARBLOCKS, ptexConv16to32); } else { // converting from 16->32 WARN_LOG("ZeroGS: converting from 16 to 32bit RTs\n"); //shadetype = 4; } } } } int psm = curvb.tex0.psm; if( PSMT_ISCLUT(curvb.tex0.psm) ) psm = curvb.tex0.cpsm; pps = LoadShadeEffect(shadertype, 0, curvb.curprim.fge, curvb.tex0.tcc && gs.texa.aem && (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S), exactcolor, curvb.clamp, context); LPD3DTEX ptexset = ptextarg == curvb.prndr ? ptextarg->ptexFeedback : ptextarg->ptex; pd3dDevice->SetTexture(SAMP_MEMORY0+context, ptexset); s_ptexCurSet[context] = ptexset; if( curvb.tex1.mmag ) { pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); dwFilterOpts |= 1; } if( curvb.tex1.mmin ) { pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); dwFilterOpts |= 2; } DXVEC4 vTexDims; vTexDims.x = curvb.tex0.tw / (float)ptextarg->fbw; vTexDims.y = curvb.tex0.th / (float)ptextarg->targheight; // look at the offset of tbp0 from fbp if( curvb.tex0.tbp0 <= ptextarg->fbp ) { vTexDims.z = 0;//-0.5f/(float)ptextarg->fbw; vTexDims.w = 0;//0.2f/(float)ptextarg->fbh; } else { u32 tbp0 = curvb.tex0.tbp0 >> 5; // align to a page int blockheight = (ptextarg->psm&2) ? 64 : 32; int ycoord = ((curvb.tex0.tbp0-ptextarg->fbp)/(32*(ptextarg->fbw>>6))) * blockheight; int xcoord = (((curvb.tex0.tbp0-ptextarg->fbp)%(32*(ptextarg->fbw>>6)))) * 2; xcoord += ptextarg->targoffx; ycoord += ptextarg->targoffy; vTexDims.z = (float)xcoord / (float)ptextarg->fbw; vTexDims.w = (float)ycoord / (float)ptextarg->targheight; } if( shadertype == 4 ) { vTexDims.z += 8.0f / (float)ptextarg->fbw; } SETCONSTF(GPU_TEXDIMS0+context, vTexDims); // zoe2 if( (ptextarg->psm&0x30) == 0x30 ) {//&& (psm&2) == (ptextarg->psm&2) ) { // target of zbuf has +1 added to it, don't do 16bit vpageoffset.w = -1; // DXVEC4 valpha2; // valpha2.x = 1; valpha2.y = 0; // valpha2.z = -1; valpha2.w = 0; // SETCONSTF(GPU_TEXALPHA20+context, &valpha2); } SETCONSTF(GPU_PAGEOFFSET0+context, vpageoffset); if( g_bSaveTex ) D3DXSaveTextureToFile("tex.tga", D3DXIFF_TGA, ptextarg == curvb.prndr ? ptextarg->ptexFeedback : ptextarg->ptex, NULL); } else { // save the texture #ifdef PCSX2_DEBUG // CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(curvb.tex0, 0); // assert( curvb.pmemtarg == pmemtarg ); // if( PSMT_ISCLUT(curvb.tex0.psm) ) // assert( curvb.pmemtarg->ValidateClut(curvb.tex0) ); #endif //#ifdef ZEROGS_CACHEDCLEAR // if( !curvb.pmemtarg->ValidateTex(curvb.tex0, true) ) { // CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(curvb.tex0, 1); // SetTexVariablesInt(context, GetTexFilter(curvb.tex1), curvb.tex0, pmemtarg, s_bForceTexFlush); // vb[context].bVarsTexSync = TRUE; // } //#endif if( g_bSaveTex ) { if( g_bSaveTex == 1 ) SaveTex(&curvb.tex0, 1); else SaveTex(&curvb.tex0, 0); } int psm = curvb.tex0.psm; if( PSMT_ISCLUT(curvb.tex0.psm) ) psm = curvb.tex0.cpsm; pps = LoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge, curvb.tex0.tcc && gs.texa.aem && (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S), exactcolor, curvb.clamp, context); } } else pps = ppsRegular[curvb.curprim.fge+2*s_bWriteDepth]; pd3dDevice->SetPixelShader(pps); BOOL bCanRenderStencil = g_bUpdateStencil && (curvb.prndr->psm&0xf) != 1 && !(curvb.frame.fbm&0x80000000); if( g_GameSettings & GAME_NOSTENCIL ) bCanRenderStencil = 0; if( s_bDestAlphaTest) { SETRS(D3DRS_STENCILENABLE, bCanRenderStencil); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_KEEP); SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS); } else SETRS(D3DRS_STENCILENABLE, 0); SETRS(D3DRS_ZWRITEENABLE, !curvb.zbuf.zmsk); SETRS(D3DRS_ZENABLE, curtest.zte); if( curtest.zte ) { if( curtest.ztst > 1 ) g_nDepthUsed = 2; if( (curtest.ztst == 2) ^ (g_nDepthBias != 0) ) { g_nDepthBias = curtest.ztst == 2; if( g_GameSettings & GAME_RELAXEDDEPTH ) SETRS(D3DRS_DEPTHBIAS, g_nDepthBias?FtoDW(0.00003f):FtoDW(0.0001f)); else SETRS(D3DRS_DEPTHBIAS, g_nDepthBias?FtoDW(0.0003f):FtoDW(0.000015f)); } SETRS(D3DRS_ZFUNC, g_dwZCmp[curtest.ztst]); // if( curtest.ztst == 3 ) { // // gequal // if( s_vznorm.y == 0 ) { // s_vznorm.y = 0.00001f; // SETCONSTF(GPU_ZNORM, s_vznorm); // } // } // else { // if( s_vznorm.y > 0 ) { // s_vznorm.y = 0; // SETCONSTF(GPU_ZNORM, s_vznorm); // } // } } SETRS(D3DRS_ALPHATESTENABLE, curtest.ate&&USEALPHATESTING); if( curtest.ate ) { if( curtest.atst == 7 && curtest.aref == 255 ) { // when it is at the very top, do a less than rather than not equal (gekibo2) SETRS(D3DRS_ALPHAFUNC, D3DCMP_LESS); SETRS(D3DRS_ALPHAREF, 255); } else { SETRS(D3DRS_ALPHAFUNC, g_dwAlphaCmp[curtest.atst]); SETRS(D3DRS_ALPHAREF, b2XAlphaTest ? min(255,2 * curtest.aref) : curtest.aref); } } if( s_bWriteDepth ) { //pd3dDevice->SetRenderTarget(0, curvb.prndr->psurf); //pd3dDevice->SetRenderTarget(1, !curvb.zbuf.zmsk?curvb.pdepth->psurf:NULL); if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, !curvb.zbuf.zmsk?0xf:0); else pd3dDevice->SetRenderTarget(1, !curvb.zbuf.zmsk?curvb.pdepth->psurf:NULL); } if( curvb.curprim.abe ) SetAlphaVariables(curvb.alpha); else SETRS(D3DRS_ALPHABLENDENABLE, 0); // needs to be before RenderAlphaTest if( curvb.fba.fba || s_bDestAlphaTest ) { if( gs.pabe || (curvb.fba.fba || bCanRenderStencil) && !(curvb.frame.fbm&0x80000000) ) { RenderFBA(curvb); } } u32 oldabe = curvb.curprim.abe; if( gs.pabe ) { //WARN_LOG("PBE!\n"); curvb.curprim.abe = 1; SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING); } if( curvb.curprim.abe && bNeedAlphaColor ) { if( //bCanRenderStencil && (bNeedBlendFactorInAlpha || ((curtest.ate && curtest.atst>1) && (curtest.aref > 0x80))) ) { // need special stencil processing for the alpha RenderAlphaTest(curvb); dwUsingSpecialTesting = 1; } // harvest fishing DXVEC4 v = vAlphaBlendColor;// + DXVEC4(0,0,0,(curvb.test.atst==4 && curvb.test.aref>=128)?-0.004f:0); if( exactcolor ) { v.y *= 255; v.w *= 255; } SETCONSTF(GPU_ONECOLOR, v); } else { // not using blending so set to defaults DXVEC4 v = exactcolor ? DXVEC4(1, 510*255.0f/256.0f, 0, 0) : DXVEC4(1,2*255.0f/256.0f,0,0); SETCONSTF(GPU_ONECOLOR, v); } if( s_bDestAlphaTest && bCanRenderStencil ) { // if not 24bit and can write to high alpha bit RenderStencil(curvb, dwUsingSpecialTesting); } else { dwStencilRef = STENCIL_SPECIAL; dwStencilMask = STENCIL_SPECIAL; // setup the stencil to only accept the test pixels if( dwUsingSpecialTesting ) { SETRS(D3DRS_STENCILENABLE, TRUE); SETRS(D3DRS_STENCILWRITEMASK, STENCIL_PIXELWRITE); SETRS(D3DRS_STENCILMASK, STENCIL_SPECIAL); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL); SETRS(D3DRS_STENCILREF, STENCIL_SPECIAL|STENCIL_PIXELWRITE); } } #ifdef PCSX2_DEBUG if( bDestAlphaColor == 1 ) { WARN_LOG("dest alpha blending! manipulate alpha here\n"); } #endif if( bCanRenderStencil && gs.pabe ) { // only render the pixels with alpha values >= 0x80 SETRS(D3DRS_STENCILREF, dwStencilRef|STENCIL_FBA); SETRS(D3DRS_STENCILMASK, dwStencilMask|STENCIL_FBA); if( !dwStencilMask ) SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL); } // curvb.prndr->SetViewport(); // pd3dDevice->SetScissorRect(&curvb.prndr->scissorrect); // SETRS(D3DRS_SCISSORTESTENABLE, TRUE); if( !curvb.test.ate || curvb.test.atst > 0 ) { DRAW(); } if( gs.pabe ) { // only render the pixels with alpha values < 0x80 SETRS(D3DRS_ALPHABLENDENABLE, 0); SETRS(D3DRS_STENCILREF, dwStencilRef); DXVEC4 v; v.x = 1; v.y = 2; v.z = 0; v.w = 0; if( exactcolor ) v.y *= 255; SETCONSTF(GPU_ONECOLOR, v); DRAW(); // reset SETRS(D3DRS_STENCILMASK, dwStencilMask); if( !dwStencilMask ) SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS); } // more work on alpha failure case if( curtest.ate && curtest.atst != 1 && curtest.afail > 0 ) { // need to reverse the test and disable some targets SETRS(D3DRS_ALPHAFUNC, g_dwReverseAlphaCmp[curtest.atst]); if( curtest.afail & 1 ) { // front buffer update only if( curtest.afail == 3 ) // disable alpha SETRS(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_BLUE|D3DCOLORWRITEENABLE_GREEN|D3DCOLORWRITEENABLE_RED); SETRS(D3DRS_ZWRITEENABLE, FALSE); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0); else pd3dDevice->SetRenderTarget(1,NULL); } } else { // zbuffer update only SETRS(D3DRS_COLORWRITEENABLE, 0); } if( gs.pabe && bCanRenderStencil ) { // only render the pixels with alpha values >= 0x80 DXVEC4 v = vAlphaBlendColor; if( exactcolor ) { v.y *= 255; v.w *= 255; } SETCONSTF(GPU_ONECOLOR, v); SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING); SETRS(D3DRS_STENCILREF, dwStencilRef|STENCIL_FBA); SETRS(D3DRS_STENCILMASK, dwStencilMask|STENCIL_FBA); if( !dwStencilMask ) SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL); } // setup the stencil to only accept the test pixels if( dwUsingSpecialTesting ) { if( !s_bDestAlphaTest || !bCanRenderStencil ) { SETRS(D3DRS_STENCILENABLE, FALSE); } } // IDirect3DQuery9* pOcclusionQuery; // DWORD numberOfPixelsDrawn; // // pd3dDevice->CreateQuery(D3DQUERYTYPE_OCCLUSION, &pOcclusionQuery); // // // Add an end marker to the command buffer queue. // pOcclusionQuery->Issue(D3DISSUE_BEGIN); DRAW(); // pOcclusionQuery->Issue(D3DISSUE_END); // Force the driver to execute the commands from the command buffer. // Empty the command buffer and wait until the GPU is idle. // while(S_FALSE == pOcclusionQuery->GetData( &numberOfPixelsDrawn, sizeof(DWORD), D3DGETDATA_FLUSH )); // SAFE_RELEASE(pOcclusionQuery); if( gs.pabe ) { // only render the pixels with alpha values < 0x80 SETRS(D3DRS_ALPHABLENDENABLE, 0); SETRS(D3DRS_STENCILREF, dwStencilRef); DXVEC4 v; v.x = 1; v.y = 2; v.z = 0; v.w = 0; if( exactcolor ) v.y *= 255; SETCONSTF(GPU_ONECOLOR, v); DRAW(); // reset SETRS(D3DRS_STENCILMASK, dwStencilMask); SETRS(D3DRS_ALPHABLENDENABLE, oldabe); if( !dwStencilMask ) SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS); } // restore if( (curtest.afail & 1) && !curvb.zbuf.zmsk ) { SETRS(D3DRS_ZWRITEENABLE, TRUE); if( s_bWriteDepth ) { assert( curvb.pdepth != NULL); if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf); else pd3dDevice->SetRenderTarget(1,curvb.pdepth->psurf); } } SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite); // not needed anymore since rest of ops concentrate on image processing //SETRS(D3DRS_ALPHAFUNC, g_dwAlphaCmp[curtest.atst]); } if( dwUsingSpecialTesting ) { // render the real alpha SETRS(D3DRS_ALPHATESTENABLE, FALSE); SETRS(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0); else pd3dDevice->SetRenderTarget(1,NULL); } SETRS(D3DRS_ZWRITEENABLE, FALSE); SETRS(D3DRS_STENCILMASK, STENCIL_SPECIAL|STENCIL_PIXELWRITE); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_KEEP); SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL); SETRS(D3DRS_STENCILREF, STENCIL_SPECIAL|STENCIL_PIXELWRITE); DXVEC4 v = DXVEC4(0,exactcolor ? 510.0f : 2.0f,0,0); SETCONSTF(GPU_ONECOLOR, v); DRAW(); // don't need to restore } if( s_bDestAlphaTest ) { if( (s_dwColorWrite&D3DCOLORWRITEENABLE_ALPHA) ) { if( curvb.fba.fba ) ProcessFBA(curvb); else if( bCanRenderStencil ) // finally make sure all entries are 1 when the dest alpha >= 0x80 (if fba is 1, this is already the case) ProcessStencil(curvb); } } else if( (s_dwColorWrite&D3DCOLORWRITEENABLE_ALPHA) && curvb.fba.fba ) ProcessFBA(curvb); if( bDestAlphaColor == 1 ) { // need to reset the dest colors to their original counter parts //WARN_LOG("Need to reset dest alpha color\n"); } #ifdef PCSX2_DEBUG if( g_bSaveFlushedFrame & 0xf ) { char str[255]; sprintf(str, "frames\\frame%.4d.jpg", g_SaveFrameNum++); if( (g_bSaveFlushedFrame & 2) ) D3DXSaveSurfaceToFile(str, D3DXIFF_JPG, curvb.prndr->psurf, NULL, NULL); } #endif // clamp the final colors, when enabled ffx2 credits mess up if( curvb.curprim.abe && bAlphaClamping && g_RenderFormat != D3DFMT_A8R8G8B8 && !(g_GameSettings&GAME_NOCOLORCLAMP) ) { // if !colclamp, skip ResetAlphaVariables(); // if processing the clamping case, make sure can write to the front buffer SETRS(D3DRS_STENCILENABLE, 0); SETRS(D3DRS_ALPHABLENDENABLE, TRUE); SETRS(D3DRS_ALPHATESTENABLE, FALSE); SETRS(D3DRS_ZENABLE, FALSE); SETRS(D3DRS_ZWRITEENABLE, FALSE); SETRS(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED|D3DCOLORWRITEENABLE_BLUE|D3DCOLORWRITEENABLE_GREEN); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0); else pd3dDevice->SetRenderTarget(1,NULL); } pd3dDevice->SetPixelShader(ppsOne); // (dest&0x7f)+0x80, blend factor for alpha is always 0x80 SETRS(D3DRS_DESTBLEND, D3DBLEND_ONE); SETRS(D3DRS_SRCBLEND, D3DBLEND_ONE); float f; if( bAlphaClamping & 1 ) { // min f = 0; SETCONSTF(GPU_ONECOLOR, &f); SETRS(D3DRS_BLENDOP, D3DBLENDOP_MAX); DRAW(); } // bios shows white screen if( bAlphaClamping & 2 ) { // max f = 1; SETCONSTF(GPU_ONECOLOR, &f); SETRS(D3DRS_BLENDOP, D3DBLENDOP_MIN); DRAW(); } if( !curvb.zbuf.zmsk ) { SETRS(D3DRS_ZWRITEENABLE, TRUE); if( s_bWriteDepth ) { assert( curvb.pdepth != NULL ); if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf); else pd3dDevice->SetRenderTarget(1,curvb.pdepth->psurf); } } if( curvb.test.ate && USEALPHATESTING ) SETRS(D3DRS_ALPHATESTENABLE, TRUE); SETRS(D3DRS_ZENABLE, curtest.zte); } if( dwFilterOpts ) { // undo filter changes if( dwFilterOpts & 1 ) pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MAGFILTER, D3DTEXF_POINT); if( dwFilterOpts & 2 ) pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MINFILTER, D3DTEXF_POINT); } // reset used textures if( shadertype > 2 ) { pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP); pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP); pd3dDevice->SetTexture(SAMP_BILINEARBLOCKS, ptexBilinearBlocks); } SETRS(D3DRS_CLIPPLANEENABLE, 0); //#ifndef RELEASE_TO_PUBLIC ppf += curvb.dwCount+0x100000; //#endif curvb.dwCurOff += POINT_BUFFERFLUSH; SAFE_RELEASE(ptexRenderTargetCached); g_MaxRenderedHeight = 0; curvb.dwCount = 0; //curvb.Lock(); curvb.curprim.abe = oldabe; //if( oldabe ) SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING); if( conf.options & GSOPTION_WIREFRAME ) { // always render first few geometry as solid if( s_nWireframeCount > 0 ) { SETRS(D3DRS_FILLMODE, D3DFILL_WIREFRAME); --s_nWireframeCount; } } } void ZeroGS::ProcessMessages() { if( listMsgs.size() > 0 ) { pSprite->Begin(D3DXSPRITE_ALPHABLEND|D3DXSPRITE_SORT_TEXTURE); RECT rctext; rctext.left = 25; rctext.top = 15; list::iterator it = listMsgs.begin(); while( it != listMsgs.end() ) { rctext.left += 1; rctext.top += 1; pFont->DrawText(pSprite, it->str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xff000000); rctext.left -= 1; rctext.top -= 1; pFont->DrawText(pSprite, it->str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xffffff30); rctext.top += 15; if( (int)(it->dwTimeStamp - timeGetTime()) < 0 ) it = listMsgs.erase(it); else ++it; } pSprite->End(); } } void ZeroGS::RenderCustom(float fAlpha) { if( !s_bBeginScene ) pd3dDevice->BeginScene(); pd3dDevice->SetDepthStencilSurface(psurfOrgDepth); pd3dDevice->SetRenderTarget(0, psurfOrgTarg); if( s_bWriteDepth ) pd3dDevice->SetRenderTarget(1, NULL); SETRS(D3DRS_STENCILENABLE, 0); SETRS(D3DRS_ZENABLE, FALSE); SETRS(D3DRS_ZWRITEENABLE, FALSE); SETRS(D3DRS_COLORWRITEENABLE, 0xf); SETRS(D3DRS_ALPHABLENDENABLE, 0); SETRS(D3DRS_ALPHATESTENABLE, 0); SETRS(D3DRS_SCISSORTESTENABLE, 0); // play custom animation pd3dDevice->Clear(0, NULL, D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER|D3DCLEAR_STENCIL, 0, 1, 0); // tex coords DXVEC4 v = DXVEC4(1, 1, 0, 0); SETCONSTF(GPU_BITBLTTEX, v); SETCONSTF(GPU_BITBLTPOS, v); v.x = v.y = v.z = v.w = fAlpha; SETCONSTF(GPU_ONECOLOR, v); if( conf.options & GSOPTION_WIREFRAME ) SETRS(D3DRS_FILLMODE, D3DFILL_SOLID); pd3dDevice->SetVertexShader(pvsBitBlt); pd3dDevice->SetStreamSource(0, pvbRect, 0, sizeof(VertexGPU)); pd3dDevice->SetPixelShader(ppsBaseTexture); // inside vb[0]'s target area, so render that region only pd3dDevice->SetTexture(SAMP_FINAL, ptexLogo); //pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); //pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2); // restore //pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MAGFILTER, D3DTEXF_POINT); //pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MINFILTER, D3DTEXF_POINT); if( conf.options & GSOPTION_WIREFRAME ) SETRS(D3DRS_FILLMODE, D3DFILL_WIREFRAME); ProcessMessages(); pd3dDevice->EndScene(); s_bBeginScene = FALSE; pd3dDevice->Present(NULL, NULL, NULL, NULL); SETRS(D3DRS_SCISSORTESTENABLE, TRUE); SETRS(D3DRS_STENCILENABLE, 1); if( icurctx >= 0 ) vb[icurctx].bSyncVars = 0; } // adjusts trans to preserve aspect ratio void ZeroGS::AdjustTransToAspect(DXVEC4& v, int dispwidth, int dispheight) { float temp, f; if( dispwidth * height > dispheight * width ) { // limited by width // change in ratio f = ((float)width / (float)dispwidth) / ((float)height / (float)dispheight); v.y *= f; v.w *= f; // scanlines mess up when not aligned right v.y += (1-modf(v.y*height/2+0.05f, &temp))*2.0f/(float)height; v.w += (1-modf(v.w*height/2+0.05f, &temp))*2.0f/(float)height; } else { // limited by height f = ((float)height / (float)dispheight) / ((float)width / (float)dispwidth); f -= modf(f*width, &temp)/(float)width; v.x *= f; v.z *= f; } } void ZeroGS::Restore() { if( !g_bIsLost ) return; if( SUCCEEDED(pd3dDevice->Reset(&d3dpp)) ) { g_bIsLost = 0; // handle lost states ZeroGS::ChangeDeviceSize(width, height); } } void ZeroGS::RenderCRTC(int interlace) { if( pd3dDevice == NULL ) { return; } if( g_bIsLost ) return; #ifdef RELEASE_TO_PUBLIC if( g_nRealFrame < 80 ) { RenderCustom( min(1.0f, 2.0f - (float)g_nRealFrame / 40.0f) ); if( g_nRealFrame == 79 ) SAFE_RELEASE(ptexLogo); return; } #endif Flush(0); Flush(1); // frame skipping if( g_nFrameRender > 0 ) { if( g_nFrameRender < 8 ) { g_nFrameRender++; if( g_nFrameRender <= 3 ) { g_nFramesSkipped++; return; } } } else { if( g_nFrameRender < -1 ) { g_nFramesSkipped++; return; } g_nFrameRender--; } if( g_bSaveFrame ) { if( vb[0].prndr != NULL ) D3DXSaveSurfaceToFile("frame1.tga", D3DXIFF_TGA, vb[0].prndr->psurf, NULL, NULL); if( vb[1].prndr != NULL && vb[0].prndr != vb[1].prndr ) D3DXSaveSurfaceToFile("frame2.tga", D3DXIFF_TGA, vb[1].prndr->psurf, NULL, NULL); else DeleteFile("frame2.tga"); } if( s_RangeMngr.ranges.size() > 0 ) FlushTransferRanges(NULL); if( icurctx >= 0 && vb[icurctx].bVarsSetTarg ) { // check if anything rendered pd3dDevice->SetRenderTarget(0, psurfOrgTarg); pd3dDevice->SetRenderTarget(1, NULL); pd3dDevice->SetDepthStencilSurface(psurfOrgDepth); } D3DVIEWPORT9 view; view.Width = width; view.Height = height; view.X = 0; view.Y = 0; view.MinZ = 0; view.MaxZ = 1.0f; pd3dDevice->SetViewport(&view); //g_GameSettings |= GAME_VSSHACK|GAME_FULL16BITRES|GAME_NODEPTHRESOLVE; //s_bWriteDepth = TRUE; g_SaveFrameNum = 0; g_bSaveFlushedFrame = 1; // static int counter = 0; // counter++; // reset fba after every frame //if( !(g_GameSettings&GAME_NOFBARESET) ) { vb[0].fba.fba = 0; vb[1].fba.fba = 0; //} u32 bInterlace = SMODE2->INT && SMODE2->FFMD && (conf.interlace<2); // if interlace, only clear every other vsync if(!bInterlace ) { u32 color = D3DCOLOR_ARGB(0, BGCOLOR->R, BGCOLOR->G, BGCOLOR->B); pd3dDevice->Clear(0, NULL, D3DCLEAR_TARGET|D3DCLEAR_STENCIL, color, 1, 0); } if( !s_bBeginScene ) { pd3dDevice->BeginScene(); s_bBeginScene = TRUE; } pd3dDevice->SetVertexShader(pvsBitBlt); pd3dDevice->SetStreamSource(0, pvbRect, 0, sizeof(VertexGPU)); if( conf.options & GSOPTION_WIREFRAME ) SETRS(D3DRS_FILLMODE, D3DFILL_SOLID); SETRS(D3DRS_ZENABLE, 0); SETRS(D3DRS_ZWRITEENABLE, 0); SETRS(D3DRS_COLORWRITEENABLE, 0xf); SETRS(D3DRS_ALPHABLENDENABLE, 0); SETRS(D3DRS_ALPHATESTENABLE, 0); SETRS(D3DRS_SCISSORTESTENABLE, 0); SETRS(D3DRS_STENCILENABLE, 0); BOOL bUsingStencil = 0; if( bInterlace ) g_PrevBitwiseTexX = -1; // reset since will be using tex0Info dispinfo[2]; for(int i = 0; i < 2; ++i) { if( !(*(u32*)(PMODE) & (1<MAGH+1; int magv = pd->MAGV+1; dispinfo[i].tbp0 = pfb->FBP << 5; dispinfo[i].tbw = pfb->FBW << 6; dispinfo[i].tw = (pd->DW + 1) / magh; dispinfo[i].th = (pd->DH + 1) / magv; dispinfo[i].psm = pfb->PSM; // hack!! // 2 * dispinfo[i].tw / dispinfo[i].th <= 1, metal slug 4 if( bInterlace && 2 * dispinfo[i].tw / dispinfo[i].th <= 1 && !(g_GameSettings&GAME_INTERLACE2X) ) { dispinfo[i].th >>= 1; } } //int dispwidth = max(dispinfo[0].tw, dispinfo[1].tw), dispheight = max(dispinfo[0].th, dispinfo[1].th); // hack!, CMOD != 3, gradius // if( SMODE2->INT && SMODE2->FFMD && SMODE1->CMOD == 3 && dispwidth <= 320) // dispwidth *= 2; // hack! makai //if( !bInterlace && dispheight * 2 < dispwidth ) dispheight *= 2; // start from the last circuit for(int i = !PMODE->SLBG; i >= 0; --i) { tex0Info& texframe = dispinfo[i]; if( texframe.th <= 1 ) continue; GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1; GSRegDISPLAY* pd = i ? DISPLAY2 : DISPLAY1; DXVEC4 v, valpha; if( bInterlace ) { texframe.th >>= 1; // interlace mode pd3dDevice->SetTexture(SAMP_INTERLACE, CreateInterlaceTex(2*texframe.th)); if( interlace == (conf.interlace&1) ) { // pass if odd valpha.z = 1.0f; valpha.w = -0.4999f; } else { // pass if even valpha.z = -1.0f; valpha.w = 0.5001f; } } else { if( SMODE2->INT && SMODE2->FFMD ) { texframe.th >>= 1; } // always pass interlace test valpha.z = 0; valpha.w = 1; } int bpp = 4; if( texframe.psm == 0x12 ) bpp = 3; else if( texframe.psm & 2 ) bpp = 2; // get the start and end addresses of the buffer int start, end; GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw); if( i == 0 ) { // setup right blending SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING); SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); SETRS(D3DRS_BLENDOPALPHA, D3DBLENDOP_ADD); if( PMODE->MMOD ) { SETRS(D3DRS_BLENDFACTOR, D3DCOLOR_ARGB(0x80, PMODE->ALP, PMODE->ALP, PMODE->ALP)); SETRS(D3DRS_SRCBLEND, D3DBLEND_BLENDFACTOR); SETRS(D3DRS_DESTBLEND, D3DBLEND_INVBLENDFACTOR); } else { SETRS(D3DRS_SRCBLEND, D3DBLEND_SRCALPHA); SETRS(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA); } SETRS(D3DRS_SRCBLENDALPHA, PMODE->AMOD ? D3DBLEND_ZERO : D3DBLEND_ONE); SETRS(D3DRS_DESTBLENDALPHA, PMODE->AMOD? D3DBLEND_ONE : D3DBLEND_ZERO); } if( bUsingStencil ) { SETRS(D3DRS_STENCILWRITEMASK, 1<SetPixelShader(ppsCRTC24[bInterlace]); valpha.x = 0; valpha.y = 1; SETCONSTF(GPU_ONECOLOR, valpha); pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2); continue; } // first render the current render targets, then from ptexMem if( texframe.psm == 1 ) { valpha.x = 0; valpha.y = 1; } else { valpha.x = 1; valpha.y = 0; } SETCONSTF(GPU_ONECOLOR, valpha); BOOL bSkip = 0; BOOL bResolveTargs = 1; //s_mapFrameHeights[s_nCurFrameMap][texframe.tbp0] = texframe.th; list listTargs; s_RTs.GetTargs(start, end, listTargs); for(list::iterator it = listTargs.begin(); it != listTargs.end(); ) { CRenderTarget* ptarg = *it; if( ptarg->fbw == texframe.tbw && !(ptarg->status&CRenderTarget::TS_NeedUpdate) && ((256/bpp)*(texframe.tbp0-ptarg->fbp))%texframe.tbw == 0 ) { if( ptarg->fbp != texframe.tbp0 ) { // look for a better target (metal slug 5) list::iterator itbetter; for(itbetter = listTargs.begin(); itbetter != listTargs.end(); ++itbetter ) { if( (*itbetter)->fbp == texframe.tbp0 ) break; } if( itbetter != listTargs.end() ) { it = listTargs.erase(it); continue; } } static int sindex = 0; char strtemp[25]; sprintf(strtemp, "frames/frame%d.jpg", sindex++); // D3DXSaveSurfaceToFile(strtemp, D3DXIFF_JPG, ptarg->psurf, NULL, NULL); // if( g_bSaveFinalFrame ) // D3DXSaveSurfaceToFile("frame1.tga", D3DXIFF_TGA, ptarg->psurf, NULL, NULL); int dby = pfb->DBY; int movy = 0; // determine the rectangle to render if( ptarg->fbp < texframe.tbp0 ) { dby += (256/bpp)*(texframe.tbp0-ptarg->fbp)/texframe.tbw; } else if( ptarg->fbp > texframe.tbp0 ) { dby -= (256/bpp)*(ptarg->fbp-texframe.tbp0)/texframe.tbw; if( dby < 0 ) { movy = -dby; dby = 0; } } int dh = min(ptarg->fbh - dby, texframe.th-movy); if( dh >= 64 ) { if( ptarg->fbh - dby < texframe.th-movy && !bUsingStencil ) { pd3dDevice->Clear(0, NULL, D3DCLEAR_STENCIL, 0, 1, 0); bUsingStencil = 1; SETRS(D3DRS_STENCILENABLE, TRUE); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_STENCILFUNC, D3DCMP_NOTEQUAL); SETRS(D3DRS_STENCILREF, 3); SETRS(D3DRS_STENCILWRITEMASK, 1<fbh; // tex coords v = DXVEC4(fiw*(float)texframe.tw, fih*(float)(dh), fiw*(float)(pfb->DBX), fih*((float)dby-0.5f)); SETCONSTF(GPU_BITBLTTEX, v); // dest rect v.x = 1; v.y = dh/(float)texframe.th; v.z = 0; v.w = 1-v.y; if( movy > 0 ) v.w -= movy/(float)texframe.th; if (bInterlace && interlace == (conf.interlace&1) ) { // move down by 1 pixel v.w += 1.0f / (float)dh; } AdjustTransToAspect(v, (conf.options&GSOPTION_WIDESCREEN)?960:640, (conf.options&GSOPTION_WIDESCREEN)?540:480); SETCONSTF(GPU_BITBLTPOS, v); // use GPU_INVTEXDIMS to store inverse texture dims v.x = fiw; v.y = fih; v.z = 0; SETCONSTF(GPU_INVTEXDIMS, v); // inside vb[0]'s target area, so render that region only pd3dDevice->SetTexture(SAMP_FINAL, ptarg->ptex); pd3dDevice->SetPixelShader(ppsCRTCTarg[bInterlace]); pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2); if( abs(dh - (int)texframe.th) <= 1 ) { bSkip = 1; break; } if( abs(dh - (int)ptarg->fbh) <= 1 ) { it = listTargs.erase(it); continue; } } } ++it; } if( !bSkip ) { for(list::iterator it = listTargs.begin(); it != listTargs.end(); ++it) (*it)->Resolve(); // context has to be 0 SetTexVariablesInt(0, 2, texframe, g_MemTargs.GetMemoryTarget(texframe, 1), 1); if( g_bSaveFinalFrame ) SaveTex(&texframe, g_bSaveFinalFrame-1>0); // finally render from the memory (note that the stencil buffer will keep previous regions) v = DXVEC4(1,1,0,0); if (bInterlace && interlace == (conf.interlace)) { // move down by 1 pixel v.w += 1.0f / (float)texframe.th; } AdjustTransToAspect(v, (conf.options&GSOPTION_WIDESCREEN)?960:640, (conf.options&GSOPTION_WIDESCREEN)?540:480); SETCONSTF(GPU_BITBLTPOS, v); v = DXVEC4(texframe.tw,texframe.th,-0.5f,-0.5f); SETCONSTF(GPU_BITBLTTEX, v); // use GPU_INVTEXDIMS to store inverse texture dims v.x = 1.0f / (float)texframe.tw; v.y = 1.0f / (float)texframe.th; v.z = 0;//-0.5f * v.x; v.w = -0.5f * v.y; SETCONSTF(GPU_INVTEXDIMS, v); pd3dDevice->SetPixelShader(ppsCRTC[bInterlace]); pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2); } } if(1) {// || !bInterlace) { s_bBeginScene = FALSE; ProcessMessages(); if( g_bMakeSnapshot ) { RECT rctext; char str[64]; rctext.left = 200; rctext.top = 15; sprintf(str, "ZeroGS %d.%d.%d - %.1f fps %s", revision, build, minor, fFPS, s_frameskipping?" - frameskipping":""); pSprite->Begin(D3DXSPRITE_ALPHABLEND|D3DXSPRITE_SORT_TEXTURE); rctext.left += 1; rctext.top += 1; pFont->DrawText(pSprite, str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xff000000); rctext.left -= 1; rctext.top -= 1; pFont->DrawText(pSprite, str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xffc0ffff); pSprite->End(); } if( g_bDisplayFPS ) { RECT rctext; char str[64]; rctext.left = 10; rctext.top = 10; sprintf(str, "%.1f fps", fFPS); pSprite->Begin(D3DXSPRITE_ALPHABLEND|D3DXSPRITE_SORT_TEXTURE); rctext.left += 1; rctext.top += 1; pFont->DrawText(pSprite, str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xff000000); rctext.left -= 1; rctext.top -= 1; pFont->DrawText(pSprite, str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xffc0ffff); pSprite->End(); } pd3dDevice->EndScene(); if( pd3dDevice->Present(NULL, NULL, NULL, NULL) == D3DERR_DEVICELOST ) { // device is lost, need to recreate DEBUG_LOG("device lost\n"); g_bIsLost = TRUE; Reset(); return; } if( conf.options & GSOPTION_WIREFRAME ) { // clear all targets s_nWireframeCount = 1; } if( g_bMakeSnapshot ) { if( SUCCEEDED(D3DXSaveSurfaceToFile(strSnapshot != ""?strSnapshot.c_str():"temp.jpg", (conf.options&GSOPTION_BMPSNAP)?D3DXIFF_BMP:D3DXIFF_JPG, psurfOrgTarg, NULL, NULL)) ) { char str[255]; sprintf(str, "saved %s\n", strSnapshot.c_str()); AddMessage(str, 500); } g_bMakeSnapshot = 0; } if( s_avicapturing ) { CaptureFrame(); } if( s_nNewWidth >= 0 && s_nNewHeight >= 0 && !g_bIsLost ) { Reset(); ChangeDeviceSize(s_nNewWidth, s_nNewHeight); s_nNewWidth = s_nNewHeight = -1; } // switch the fbp lists // s_nCurFBPSet ^= 1; // s_setFBP[s_nCurFBPSet].clear(); //s_nCurFrameMap ^= 1; //s_mapFrameHeights[s_nCurFrameMap].clear(); } pd3dDevice->SetTexture(SAMP_FINAL, NULL); // d3d debug complains if not g_MemTargs.DestroyCleared(); for(list::iterator it = s_vecTempTextures.begin(); it != s_vecTempTextures.end(); ++it) (*it)->Release(); s_vecTempTextures.clear(); if( EXTWRITE->WRITE&1 ) { WARN_LOG("EXTWRITE\n"); ExtWrite(); EXTWRITE->WRITE = 0; } if( conf.options & GSOPTION_WIREFRAME ) SETRS(D3DRS_FILLMODE, D3DFILL_WIREFRAME); SETRS(D3DRS_SCISSORTESTENABLE, TRUE); if( icurctx >= 0 ) { vb[icurctx].bVarsSetTarg = FALSE; vb[icurctx].bVarsTexSync = FALSE; vb[0].bVarsTexSync = FALSE; } // statistics if( s_nWriteDepthCount > 0 ) { assert( conf.mrtdepth ); if( --s_nWriteDepthCount <= 0 ) { s_bWriteDepth = FALSE; } } if( s_nWriteDestAlphaTest > 0 ) { if( --s_nWriteDestAlphaTest <= 0 ) { s_bDestAlphaTest = FALSE; } } if( g_GameSettings & GAME_AUTORESET ) { s_nResolveCounts[s_nCurResolveIndex] = s_nResolved; s_nCurResolveIndex = (s_nCurResolveIndex+1)%ArraySize(s_nResolveCounts); int total = 0; for(int i = 0; i < ArraySize(s_nResolveCounts); ++i) total += s_nResolveCounts[i]; if( total / ArraySize(s_nResolveCounts) > 3 ) { if( s_nLastResolveReset > (int)(fFPS * 8) ) { // reset DEBUG_LOG("ZeroGS: video mem reset\n"); s_nLastResolveReset = 0; memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts)); s_RTs.ResolveAll(); s_RTs.Destroy(); s_DepthRTs.ResolveAll(); s_DepthRTs.Destroy(); vb[0].prndr = NULL; vb[0].pdepth = NULL; vb[0].bNeedFrameCheck = 1; vb[0].bNeedZCheck = 1; vb[1].prndr = NULL; vb[1].pdepth = NULL; vb[1].bNeedFrameCheck = 1; vb[1].bNeedZCheck = 1; } } s_nLastResolveReset++; } if( s_nResolved > 8 ) s_nResolved = 2; else if( s_nResolved > 0 ) --s_nResolved; if( g_nDepthUsed > 0 ) --g_nDepthUsed; s_ClutResolve = 0; s_PSM8Resolve = 0; g_nDepthUpdateCount = 0; maxmin = 608; } ////////////////////////// // Internal Definitions // ////////////////////////// __forceinline void MOVZ(VertexGPU *p, u32 gsz, const VB& curvb) { p->z = curvb.zprimmask==0xffff?min((u32)0xffff, gsz):gsz; } __forceinline void MOVFOG(VertexGPU *p, Vertex gsf) { p->f = ((s16)(gsf).f<<7)|0x7f; } __forceinline void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb) { int index = Index; p->x = (((int)gs.gsvertex[index].x - curvb.offset.x)>>1)&0xffff; p->y = (((int)gs.gsvertex[index].y - curvb.offset.y)>>1)&0xffff; /*x = ((int)gs.gsvertex[index].x - curvb.offset.x); y = ((int)gs.gsvertex[index].y - curvb.offset.y); p.x = (x&0x7fff) | (x < 0 ? 0x8000 : 0); p.y = (y&0x7fff) | (y < 0 ? 0x8000 : 0);*/ p->f = ((s16)gs.gsvertex[index].f<<7)|0x7f; MOVZ(p, gs.gsvertex[index].z, curvb); p->rgba = prim->iip ? gs.gsvertex[index].rgba : gs.rgba; if ((g_GameSettings & GAME_TEXAHACK) && !(p->rgba&0xffffff)) p->rgba = 0; if (prim->tme ) { if( prim->fst ) { p->s = (float)gs.gsvertex[index].u * fiTexWidth[prim->ctxt]; p->t = (float)gs.gsvertex[index].v * fiTexHeight[prim->ctxt]; p->q = 1; } else { p->s = gs.gsvertex[index].s; p->t = gs.gsvertex[index].t; p->q = gs.gsvertex[index].q; } } } #define OUTPUT_VERT(fn, vert, id) { \ fn("%c%d(%d): xyzf=(%4d,%4d,0x%x,%3d), rgba=0x%8.8x, stq = (%2.5f,%2.5f,%2.5f)\n", id==0?'*':' ', id, prim->prim, vert.x/8, vert.y/8, vert.z, vert.f/128, \ vert.rgba, Clamp(vert.s, -10, 10), Clamp(vert.t, -10, 10), Clamp(vert.q, -10, 10)); \ } \ void ZeroGS::KickPoint() { assert( gs.primC >= 1 ); VB& curvb = vb[prim->ctxt]; if (curvb.bNeedTexCheck) curvb.FlushTexData(); if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp ) { assert( vb[prim->ctxt].dwCount == 0 ); Flush(!prim->ctxt); } if( curvb.dwCount >= POINT_BUFFERFLUSH) Flush(prim->ctxt); curvb.Lock(); int last = (gs.primIndex+2)%ArraySize(gs.gsvertex); VertexGPU* p = curvb.pbuf+curvb.dwCount; SET_VERTEX(&p[0], last, curvb); curvb.dwCount++; #ifdef PRIM_LOG OUTPUT_VERT(PRIM_LOG, p[0], 0); #endif } void ZeroGS::KickLine() { assert( gs.primC >= 2 ); VB& curvb = vb[prim->ctxt]; if( curvb.bNeedTexCheck ) curvb.FlushTexData(); if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp ) { assert( vb[prim->ctxt].dwCount == 0 ); Flush(!prim->ctxt); } if( curvb.dwCount >= POINT_BUFFERFLUSH/2 ) Flush(prim->ctxt); curvb.Lock(); int next = (gs.primIndex+1)%ArraySize(gs.gsvertex); int last = (gs.primIndex+2)%ArraySize(gs.gsvertex); VertexGPU* p = curvb.pbuf+curvb.dwCount*2; SET_VERTEX(&p[0], next, curvb); SET_VERTEX(&p[1], last, curvb); curvb.dwCount++; #ifdef PRIM_LOG OUTPUT_VERT(PRIM_LOG, p[0], 0); OUTPUT_VERT(PRIM_LOG, p[1], 1); #endif } void ZeroGS::KickTriangle() { assert( gs.primC >= 3 ); VB& curvb = vb[prim->ctxt]; if (curvb.bNeedTexCheck) curvb.FlushTexData(); if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp ) { assert( vb[prim->ctxt].dwCount == 0 ); Flush(!prim->ctxt); } if( curvb.dwCount >= POINT_BUFFERFLUSH/3 ) Flush(prim->ctxt); curvb.Lock(); VertexGPU* p = curvb.pbuf+curvb.dwCount*3; SET_VERTEX(&p[0], 0, curvb); SET_VERTEX(&p[1], 1, curvb); SET_VERTEX(&p[2], 2, curvb); curvb.dwCount++; #ifdef PRIM_LOG OUTPUT_VERT(PRIM_LOG, p[0], 0); OUTPUT_VERT(PRIM_LOG, p[1], 1); OUTPUT_VERT(PRIM_LOG, p[2], 2); #endif } void ZeroGS::KickTriangleFan() { assert( gs.primC >= 3 ); VB& curvb = vb[prim->ctxt]; if (curvb.bNeedTexCheck) curvb.FlushTexData(); if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp ) { assert( vb[prim->ctxt].dwCount == 0 ); Flush(!prim->ctxt); } if( curvb.dwCount >= POINT_BUFFERFLUSH/3 ) Flush(prim->ctxt); curvb.Lock(); VertexGPU* p = curvb.pbuf+curvb.dwCount*3; SET_VERTEX(&p[0], 0, curvb); SET_VERTEX(&p[1], 1, curvb); SET_VERTEX(&p[2], 2, curvb); curvb.dwCount++; // add 1 to skip the first vertex if( gs.primIndex == gs.nTriFanVert ) gs.primIndex = (gs.primIndex+1)%ArraySize(gs.gsvertex); #ifdef PRIM_LOG OUTPUT_VERT(PRIM_LOG, p[0], 0); OUTPUT_VERT(PRIM_LOG, p[1], 1); OUTPUT_VERT(PRIM_LOG, p[2], 2); #endif } __forceinline void SetKickVertex(VertexGPU *p, Vertex v, int next, const VB& curvb) { SET_VERTEX(p, next, curvb); MOVZ(p, v.z, curvb); MOVFOG(p, v); } void ZeroGS::KickSprite() { assert( gs.primC >= 2 ); VB& curvb = vb[prim->ctxt]; if( curvb.bNeedTexCheck ) curvb.FlushTexData(); if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp ) { assert( vb[prim->ctxt].dwCount == 0 ); Flush(!prim->ctxt); } if (curvb.dwCount >= POINT_BUFFERFLUSH/3) Flush(prim->ctxt); curvb.Lock(); int next = (gs.primIndex+1)%ArraySize(gs.gsvertex); int last = (gs.primIndex+2)%ArraySize(gs.gsvertex); // sprite is too small and AA shows lines (tek4) if( s_AAx ) { gs.gsvertex[last].x += 4; if (s_AAy) gs.gsvertex[last].y += 4; } // might be bad sprite (KH dialog text) //if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y ) // return; VertexGPU* p = curvb.pbuf+curvb.dwCount*3; SetKickVertex(&p[0], gs.gsvertex[last], next, curvb); SetKickVertex(&p[3], gs.gsvertex[last], next, curvb); SetKickVertex(&p[1], gs.gsvertex[last], last, curvb); SetKickVertex(&p[4], gs.gsvertex[last], last, curvb); if (g_MaxRenderedHeight < p[0].y) g_MaxRenderedHeight = p[0].y; if (g_MaxRenderedHeight < p[1].y) g_MaxRenderedHeight = p[1].y; SetKickVertex(&p[2], gs.gsvertex[last], next, curvb); p[2].s = p[1].s; p[2].x = p[1].x; SetKickVertex(&p[5], gs.gsvertex[last], last, curvb); p[5].s = p[0].s; p[5].x = p[0].x; curvb.dwCount += 2; #ifdef PRIM_LOG OUTPUT_VERT(PRIM_LOG, p[0], 0); OUTPUT_VERT(PRIM_LOG, p[1], 1); #endif } void ZeroGS::KickDummy() { //GREG_LOG("Kicking bad primitive: %.8x\n", *(u32*)prim); } __forceinline void ZeroGS::RenderFBA(const VB& curvb) { // add fba to all pixels SETRS(D3DRS_STENCILWRITEMASK, STENCIL_CLEAR); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_STENCILFAIL, D3DSTENCILOP_ZERO); SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS); SETRS(D3DRS_STENCILREF, STENCIL_FBA); SETRS(D3DRS_ZENABLE, FALSE); SETRS(D3DRS_ZWRITEENABLE, FALSE); SETRS(D3DRS_COLORWRITEENABLE, 0); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0); else pd3dDevice->SetRenderTarget(1, NULL); } SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_ALPHATESTENABLE, TRUE); SETRS(D3DRS_ALPHAFUNC, D3DCMP_GREATEREQUAL); SETRS(D3DRS_ALPHAREF, 0xff); DXVEC4 v; v.x = 1; v.y = 2; v.z = 0; v.w = 0; SETCONSTF(GPU_ONECOLOR, v); DRAW(); if( !curvb.test.ate ) SETRS(D3DRS_ALPHATESTENABLE, FALSE); else { SETRS(D3DRS_ALPHAFUNC, g_dwAlphaCmp[curvb.test.atst]); SETRS(D3DRS_ALPHAREF, b2XAlphaTest ? min(255,2 * curvb.test.aref) : curvb.test.aref); } // reset (not necessary) SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite); SETRS(D3DRS_STENCILFAIL, D3DSTENCILOP_KEEP); if( !curvb.zbuf.zmsk ) { SETRS(D3DRS_ZWRITEENABLE, TRUE); assert( curvb.pdepth != NULL ); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf); else pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf); } } SETRS(D3DRS_ZENABLE, curvb.test.zte); } __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb) { if( !g_bUpdateStencil ) return; if( curvb.test.ate ) { if( curvb.test.afail == 1 ) SETRS(D3DRS_ALPHATESTENABLE, FALSE); } SETRS(D3DRS_ZWRITEENABLE, FALSE); SETRS(D3DRS_COLORWRITEENABLE, 0); DXVEC4 v; v.x = 1; v.y = 2; v.z = 0; v.w = 0; SETCONSTF(GPU_ONECOLOR, v); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0); else pd3dDevice->SetRenderTarget(1,NULL); } // or a 1 to the stencil buffer wherever alpha passes SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS); SETRS(D3DRS_STENCILENABLE, TRUE); if( !s_bDestAlphaTest ) { // clear everything SETRS(D3DRS_STENCILREF, 0); SETRS(D3DRS_STENCILWRITEMASK, STENCIL_CLEAR); SETRS(D3DRS_ALPHATESTENABLE, FALSE); DRAW(); if( curvb.test.ate && curvb.test.afail != 1 && USEALPHATESTING) SETRS(D3DRS_ALPHATESTENABLE, TRUE); } if( curvb.test.ate && curvb.test.atst>1 && curvb.test.aref > 0x80) { v.x = 1; v.y = 1; v.z = 0; v.w = 0; SETCONSTF(GPU_ONECOLOR, v); SETRS(D3DRS_ALPHAREF, curvb.test.aref); } SETRS(D3DRS_STENCILREF, STENCIL_SPECIAL); SETRS(D3DRS_STENCILWRITEMASK, STENCIL_SPECIAL); SETRS(D3DRS_ZENABLE, FALSE); DRAW(); if( curvb.test.zte ) SETRS(D3DRS_ZENABLE, TRUE); SETRS(D3DRS_ALPHATESTENABLE, 0); SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite); if( !curvb.zbuf.zmsk ) { SETRS(D3DRS_ZWRITEENABLE, TRUE); // set rt next level if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf); else pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf); } } } __forceinline void ZeroGS::RenderStencil(const VB& curvb, DWORD dwUsingSpecialTesting) { //NOTE: This stencil hack for dest alpha testing ONLY works when // the geometry in one DrawPrimitive call does not overlap // mark the stencil buffer for the new data's bits (mark 4 if alpha is >= 0xff) // mark 4 if a pixel was written (so that the stencil buf can be changed with new values) SETRS(D3DRS_STENCILWRITEMASK, STENCIL_PIXELWRITE); dwStencilMask = (curvb.test.date?STENCIL_ALPHABIT:0)|(dwUsingSpecialTesting?STENCIL_SPECIAL:0); SETRS(D3DRS_STENCILMASK, dwStencilMask); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_STENCILFUNC, dwStencilMask ? D3DCMP_EQUAL : D3DCMP_ALWAYS); dwStencilRef = curvb.test.date*curvb.test.datm|STENCIL_PIXELWRITE|(dwUsingSpecialTesting?STENCIL_SPECIAL:0); SETRS(D3DRS_STENCILREF, dwStencilRef); } __forceinline void ZeroGS::ProcessStencil(const VB& curvb) { assert( !curvb.fba.fba ); // set new alpha bit SETRS(D3DRS_STENCILWRITEMASK, STENCIL_ALPHABIT); SETRS(D3DRS_STENCILMASK, STENCIL_PIXELWRITE|STENCIL_FBA); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL); SETRS(D3DRS_STENCILREF, STENCIL_PIXELWRITE); SETRS(D3DRS_ZENABLE, FALSE); SETRS(D3DRS_ZWRITEENABLE, FALSE); SETRS(D3DRS_COLORWRITEENABLE, 0); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0); else pd3dDevice->SetRenderTarget(1, NULL); } SETRS(D3DRS_ALPHATESTENABLE, 0); pd3dDevice->SetPixelShader(ppsOne); DRAW(); // process when alpha >= 0xff SETRS(D3DRS_STENCILREF, STENCIL_PIXELWRITE|STENCIL_FBA|STENCIL_ALPHABIT); SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL); DRAW(); // clear STENCIL_PIXELWRITE bit SETRS(D3DRS_STENCILWRITEMASK, STENCIL_CLEAR); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS); SETRS(D3DRS_STENCILREF, 0); DRAW(); // restore state SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite); if( curvb.test.ate && USEALPHATESTING) SETRS(D3DRS_ALPHATESTENABLE, TRUE); if( !curvb.zbuf.zmsk ) { SETRS(D3DRS_ZWRITEENABLE, TRUE); if( s_bWriteDepth ) { assert( curvb.pdepth != NULL ); if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf); else pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf); } } SETRS(D3DRS_ZENABLE, curvb.test.zte); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_KEEP); } __forceinline void ZeroGS::ProcessFBA(const VB& curvb) { if( (curvb.frame.fbm&0x80000000) ) return; // add fba to all pixels that were written and alpha was less than 0xff SETRS(D3DRS_STENCILWRITEMASK, STENCIL_ALPHABIT); SETRS(D3DRS_STENCILMASK, STENCIL_PIXELWRITE|STENCIL_FBA); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE); SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL); SETRS(D3DRS_STENCILREF, STENCIL_FBA|STENCIL_PIXELWRITE|STENCIL_ALPHABIT); SETRS(D3DRS_ZENABLE, FALSE); SETRS(D3DRS_ZWRITEENABLE, FALSE); SETRS(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0); else pd3dDevice->SetRenderTarget(1, NULL); } // processes the pixels with ALPHA < 0x80*2 SETRS(D3DRS_ALPHATESTENABLE, TRUE); SETRS(D3DRS_ALPHAFUNC, D3DCMP_LESSEQUAL); SETRS(D3DRS_ALPHAREF, 0xff); // add 1 to dest SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE); SETRS(D3DRS_DESTBLENDALPHA, D3DBLEND_ONE); SETRS(D3DRS_BLENDOPALPHA, D3DBLENDOP_ADD); float f = 1; SETCONSTF(GPU_ONECOLOR, &f); pd3dDevice->SetPixelShader(ppsOne); DRAW(); SETRS(D3DRS_ALPHATESTENABLE, FALSE); // reset bits SETRS(D3DRS_STENCILWRITEMASK, STENCIL_CLEAR); SETRS(D3DRS_STENCILMASK, STENCIL_PIXELWRITE|STENCIL_FBA); SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_ZERO); SETRS(D3DRS_STENCILFAIL, D3DSTENCILOP_KEEP); SETRS(D3DRS_STENCILFUNC, D3DCMP_GREATER); SETRS(D3DRS_STENCILREF, 0); DRAW(); if( curvb.test.atst && USEALPHATESTING) { SETRS(D3DRS_ALPHATESTENABLE, TRUE); SETRS(D3DRS_ALPHAFUNC, g_dwAlphaCmp[curvb.test.atst]); SETRS(D3DRS_ALPHAREF, b2XAlphaTest ? min(255,2 * curvb.test.aref) : curvb.test.aref); } // restore (SetAlphaVariables) if( !bNeedAlphaColor ) SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE);//(bNeedBlendFactorInAlpha ? D3DBLEND_ZERO : D3DBLEND_ONE)); SETRS(D3DRS_DESTBLENDALPHA, D3DBLEND_ZERO);//bNeedBlendFactorInAlpha ? D3DBLEND_ONE : D3DBLEND_ZERO); if(bNeedAlphaColor && vAlphaBlendColor.y<0) SETRS(D3DRS_BLENDOPALPHA, D3DBLENDOP_REVSUBTRACT); // reset (not necessary) SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite); if( !curvb.zbuf.zmsk ) { SETRS(D3DRS_ZWRITEENABLE, TRUE); if( s_bWriteDepth ) { if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf); else pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf); } } SETRS(D3DRS_ZENABLE, curvb.test.zte); } inline void ZeroGS::SetContextTarget(int context) { VB& curvb = vb[context]; if( curvb.prndr == NULL ) curvb.prndr = s_RTs.GetTarg(curvb.frame, 0, GET_MAXHEIGHT(curvb.gsfb.fbp, curvb.gsfb.fbw, curvb.gsfb.psm)); // make sure targets are valid if( curvb.pdepth == NULL ) { frameInfo f; f.fbp = curvb.zbuf.zbp; f.fbw = curvb.frame.fbw; f.fbh = curvb.prndr->fbh; f.psm = curvb.zbuf.psm; f.fbm = 0; curvb.pdepth = (CDepthTarget*)s_DepthRTs.GetTarg(f, CRenderTargetMngr::TO_DepthBuffer|CRenderTargetMngr::TO_StrictHeight| (curvb.zbuf.zmsk?CRenderTargetMngr::TO_Virtual:0), GET_MAXHEIGHT(curvb.zbuf.zbp, curvb.gsfb.fbw, 0)); } assert( curvb.prndr != NULL && curvb.pdepth != NULL ); assert( curvb.pdepth->fbh == curvb.prndr->targheight ); // if( curvb.pdepth->fbh != curvb.prndr->fbh ) { // // s_DepthRTs.DestroyTarg(curvb.pdepth); // ERROR_LOG("ZeroGS: render and depth heights different: %x %x\n", curvb.prndr->fbh, curvb.pdepth->fbh); // frameInfo f; // f.fbp = curvb.zbuf.zbp; // f.fbw = curvb.frame.fbw; // f.fbh = curvb.prndr->fbh; // f.psm = curvb.zbuf.psm; // f.fbm = 0; // curvb.pdepth = (CDepthTarget*)s_DepthRTs.GetTarg(f, CRenderTargetMngr::TO_DepthBuffer|CRenderTargetMngr::TO_StrictHeight| // (curvb.zbuf.zmsk?CRenderTargetMngr::TO_Virtual:0), GET_MAXHEIGHT(curvb.zbuf.zbp, curvb.gsfb.fbw, 0)); // } if( curvb.pdepth->status & CRenderTarget::TS_Virtual) { if( !curvb.zbuf.zmsk ) { CRenderTarget* ptemp = s_DepthRTs.Promote(curvb.pdepth->fbp|(curvb.pdepth->fbw<<16)); assert( ptemp == curvb.pdepth ); } else curvb.pdepth->status &= ~CRenderTarget::TS_NeedUpdate; } BOOL bSetTarg = 1; if( curvb.pdepth->status & CRenderTarget::TS_NeedUpdate ) { assert( !(curvb.pdepth->status & CRenderTarget::TS_Virtual) ); // don't update if virtual curvb.pdepth->Update(context, curvb.prndr); bSetTarg = 0; } if( curvb.prndr->status & CRenderTarget::TS_NeedUpdate ) { if( s_bWriteDepth ) { if( bSetTarg ) { pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf); pd3dDevice->SetDepthStencilSurface(curvb.pdepth->pdepth); } } else if( bSetTarg ) pd3dDevice->SetDepthStencilSurface(curvb.pdepth->pdepth); curvb.prndr->Update(context, curvb.pdepth); // note, targ already set } else { //if( (vb[0].prndr != vb[1].prndr && vb[!context].bVarsSetTarg) || !vb[context].bVarsSetTarg ) pd3dDevice->SetRenderTarget(0, curvb.prndr->psurf); //if( bSetTarg && ((vb[0].pdepth != vb[1].pdepth && vb[!context].bVarsSetTarg) || !vb[context].bVarsSetTarg) ) curvb.pdepth->SetDepthTarget(); if( s_ptexCurSet[0] == curvb.prndr->ptex ) { s_ptexCurSet[0] = NULL; pd3dDevice->SetTexture(SAMP_MEMORY0, NULL); } if( s_ptexCurSet[1] == curvb.prndr->ptex ) { s_ptexCurSet[1] = NULL; pd3dDevice->SetTexture(SAMP_MEMORY1, NULL); } curvb.prndr->SetViewport(); } curvb.prndr->SetTarget(curvb.frame.fbp, curvb.scissor, context); if( (curvb.zbuf.zbp-curvb.pdepth->fbp) != (curvb.frame.fbp - curvb.prndr->fbp) && curvb.test.zte ) { WARN_LOG("frame and zbuf not aligned\n"); } curvb.bVarsSetTarg = TRUE; if( vb[!context].prndr != curvb.prndr ) vb[!context].bVarsSetTarg = FALSE; assert( !(curvb.prndr->status&CRenderTarget::TS_NeedUpdate) ); assert( curvb.pdepth == NULL || !(curvb.pdepth->status&CRenderTarget::TS_NeedUpdate) ); } void ZeroGS::SetTexVariables(int context) { if( !vb[context].curprim.tme ) { return; } assert( !vb[context].bNeedTexCheck ); DXVEC4 v, v2; tex0Info& tex0 = vb[context].tex0; float fw = (float)tex0.tw; float fh = (float)tex0.th; if( !vb[context].bTexConstsSync ) { // alpha and texture highlighting DXVEC4 valpha, valpha2; // if clut, use the frame format int psm = tex0.psm; if( PSMT_ISCLUT(tex0.psm) ) psm = tex0.cpsm; int nNeedAlpha = (psm == 1 || psm == 2 || psm == 10); DXVEC4 vblack; vblack.x = vblack.y = vblack.z = vblack.w = 10; switch(tex0.tfx) { case 0: valpha.z = 0; valpha.w = 0; valpha2.x = 0; valpha2.y = 0; valpha2.z = 2; valpha2.w = 1; break; case 1: valpha.z = 0; valpha.w = 1; valpha2.x = 1; valpha2.y = 0; valpha2.z = 0; valpha2.w = 0; break; case 2: valpha.z = 1; valpha.w = 1.0f; valpha2.x = 0; valpha2.y = tex0.tcc ? 1.0f : 0.0f; valpha2.z = 2; valpha2.w = 0; break; case 3: valpha.z = 1; valpha.w = tex0.tcc ? 0.0f : 1.0f; valpha2.x = 0; valpha2.y = tex0.tcc ? 1.0f : 0.0f; valpha2.z = 2; valpha2.w = 0; break; default: __assume(0); } if( tex0.tcc ) { if( tex0.tfx == 1 ) { //mode.x = 10; valpha.z = 0; valpha.w = 0; valpha2.x = 1; valpha2.y = 1; valpha2.z = 0; valpha2.w = 0; } if( nNeedAlpha ) { if( tex0.tfx == 0 ) { // make sure alpha is mult by two when the output is Cv = Ct*Cf valpha.x = 2*gs.texa.fta[0]; // if 24bit, always choose ta[0] valpha.y = 2*gs.texa.fta[psm != 1]; valpha.y -= valpha.x; } else { valpha.x = gs.texa.fta[0]; // if 24bit, always choose ta[0] valpha.y = gs.texa.fta[psm != 1]; valpha.y -= valpha.x; } // need black detection if( gs.texa.aem && psm == PSMCT24 ) vblack.w = 0; } else { if( tex0.tfx == 0 ) { valpha.x = 0; valpha.y = 2; } else { valpha.x = 0; valpha.y = 1; } } } else { // reset alpha to color valpha.x = valpha.y = 0; valpha.w = 1; } SETCONSTF(GPU_TEXALPHA0+context, &valpha); SETCONSTF(GPU_TEXALPHA20+context, &valpha2); if( tex0.tcc && gs.texa.aem && (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S) ) SETCONSTF(GPU_TESTBLACK0+context, &vblack); // clamp relies on texture width { clampInfo* pclamp = &ZeroGS::vb[context].clamp; DXVEC4 v, v2; v.x = v.y = 0; LPD3DTEX* ptex = ZeroGS::vb[context].ptexClamp; ptex[0] = ptex[1] = NULL; float fw = ZeroGS::vb[context].tex0.tw; float fh = ZeroGS::vb[context].tex0.th; switch(pclamp->wms) { case 0: v2.x = -1e10; v2.z = 1e10; break; case 1: // pclamp // suikoden5 movie text v2.x = 0; v2.z = 1-0.5f/fw; break; case 2: // reg pclamp v2.x = (pclamp->minu+0.5f)/fw; v2.z = (pclamp->maxu-0.5f)/fw; break; case 3: // region rep x v.x = 0.9999f; v.z = fw / (float)GPU_TEXMASKWIDTH; v2.x = (float)GPU_TEXMASKWIDTH / fw; v2.z = pclamp->maxu / fw; if( pclamp->minu != g_PrevBitwiseTexX ) { g_PrevBitwiseTexX = pclamp->minu; ptex[0] = ZeroGS::s_BitwiseTextures.GetTex(pclamp->minu, NULL); } break; default: __assume(0); } switch(pclamp->wmt) { case 0: v2.y = -1e10; v2.w = 1e10; break; case 1: // pclamp // suikoden5 movie text v2.y = 0; v2.w = 1-0.5f/fh; break; case 2: // reg pclamp v2.y = (pclamp->minv+0.5f)/fh; v2.w = (pclamp->maxv-0.5f)/fh; break; case 3: // region rep y v.y = 0.9999f; v.w = fh / (float)GPU_TEXMASKWIDTH; v2.y = (float)GPU_TEXMASKWIDTH / fh; v2.w = pclamp->maxv / fh; if( pclamp->minv != g_PrevBitwiseTexY ) { g_PrevBitwiseTexY = pclamp->minv; ptex[1] = ZeroGS::s_BitwiseTextures.GetTex(pclamp->minv, ptex[0]); } break; default: __assume(0); } SETCONSTF(GPU_TEXWRAPMODE0+context, v); SETCONSTF(GPU_CLAMPEXTS0+context, v2); } vb[context].bTexConstsSync = TRUE; } if(s_bTexFlush ) { if( PSMT_ISCLUT(tex0.psm) ) texClutWrite(context); else s_bTexFlush = FALSE; } } void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, CMemoryTarget* pmemtarg, int force) { DXVEC4 v; assert( pmemtarg != NULL ); float fw = (float)tex0.tw; float fh = (float)tex0.th; if( bilinear > 1 || (bilinear && conf.bilinear)) { v.x = (float)fw; v.y = (float)fh; v.z = 1.0f / (float)fw; v.w = 1.0f / (float)fh; SETCONSTF(GPU_REALTEXDIMS0+context, v); } if( m_Blocks[tex0.psm].bpp == 0 ) { ERROR_LOG("ZeroGS: Undefined tex psm 0x%x!\n", tex0.psm); return; } const BLOCK& b = m_Blocks[tex0.psm]; float fbw = (float)tex0.tbw; v.x = b.vTexDims.x * fw; v.y = b.vTexDims.y * fh; v.z = (float)BLOCK_TEXWIDTH*(0.002f / 64.0f + 0.01f/128.0f); v.w = (float)BLOCK_TEXHEIGHT*0.2f/512.0f; if( bilinear > 1 || (conf.bilinear && bilinear) ) { v.x *= 1/128.0f; v.y *= 1/512.0f; v.z *= 1/128.0f; v.w *= 1/512.0f; } SETCONSTF(GPU_TEXDIMS0+context, v); float g_fitexwidth = g_fiGPU_TEXWIDTH/(float)pmemtarg->widthmult; float g_texwidth = GPU_TEXWIDTH*(float)pmemtarg->widthmult; SETCONSTF(GPU_TEXBLOCK0+context, &b.vTexBlock.x); float fpage = tex0.tbp0*(64.0f*g_fitexwidth) + 0.05f * g_fitexwidth; float fpageint = floorf(fpage); int starttbp = (int)fpage; // 2048 is number of words to span one page float fblockstride = (2048.0f /(float)(g_texwidth*BLOCK_TEXWIDTH)) * b.vTexDims.x * fbw; assert( fblockstride >= 1.0f ); v.x = (float)(2048 * g_fitexwidth); v.y = fblockstride; v.z = g_fBlockMult/(float)pmemtarg->widthmult; v.w = fpage-fpageint; if( g_fBlockMult > 1 ) { // make sure to divide by mult (since the G16R16 texture loses info) v.z *= b.bpp * (1/32.0f); } SETCONSTF(GPU_TEXOFFSET0+context, v); v.y = (float)1.0f / (float)((pmemtarg->realheight+pmemtarg->widthmult-1)/pmemtarg->widthmult); v.x = (fpageint-(float)pmemtarg->realy/(float)pmemtarg->widthmult+0.5f)*v.y; SETCONSTF(GPU_PAGEOFFSET0+context, v); if( force ) { pd3dDevice->SetTexture(SAMP_MEMORY0+context, pmemtarg->ptex); s_ptexCurSet[context] = pmemtarg->ptex; } else s_ptexNextSet[context] = pmemtarg->ptex; vb[context].pmemtarg = pmemtarg; vb[context].bVarsTexSync = FALSE; } // assumes texture factor is unused #define SET_ALPHA_COLOR_FACTOR(sign) { \ switch(a.c) { \ case 0: \ bNeedAlphaColor = 1; \ vAlphaBlendColor.y = (sign) ? 2.0f*255.0f/256.0f : -2.0f*255.0f/256.0f; \ SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE); \ SETRS(D3DRS_BLENDOPALPHA, (sign) ? D3DBLENDOP_ADD : D3DBLENDOP_REVSUBTRACT); \ break; \ case 1: \ /* if in 24 bit mode, dest alpha should be one */ \ switch(vb[icurctx].prndr->psm&0xf) { \ case 0: \ bDestAlphaColor = (a.d!=2)&&((a.a==a.d)||(a.b==a.d)); \ break; \ case 1: \ /* dest alpha should be one */ \ bDestAlphaColor = 2; \ break; \ /* default: 16bit surface, so returned alpha is ok */ \ } \ break; \ \ case 2: \ bNeedBlendFactorInAlpha = 1; /* should disable alpha channel writing */ \ bNeedAlphaColor = 1; \ vAlphaBlendColor.y = 0; \ vAlphaBlendColor.w = (sign) ? (float)a.fix * (2.0f/255.0f) : (float)a.fix * (-2.0f/255.0f); \ usec = 0; /* change so that alpha comes from source*/ \ break; \ } \ } \ //if( a.fix <= 0x80 ) { \ // dwTemp = (a.fix*2)>255?255:(a.fix*2); \ // dwTemp = dwTemp|(dwTemp<<8)|(dwTemp<<16)|0x80000000; \ // DEBUG_LOG("bfactor: %8.8x\n", dwTemp); \ // SETRS(D3DRS_BLENDFACTOR, dwTemp); \ // } \ // else { \ void ZeroGS::ResetAlphaVariables() { s_bAlphaSet = FALSE; } void ZeroGS::SetAlphaVariables(const alphaInfo& a) { SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING); // always set if( s_bAlphaSet && a.abcd == s_alphaInfo.abcd && a.fix == s_alphaInfo.fix ) { return; } // TODO: negative color when not clamping turns to positive??? g_vars._bAlphaState = 0; // set all to zero bNeedBlendFactorInAlpha = 0; b2XAlphaTest = 1; DWORD dwTemp = 0xffffffff; // default SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE); SETRS(D3DRS_DESTBLENDALPHA, D3DBLEND_ZERO); SETRS(D3DRS_BLENDOPALPHA, D3DBLENDOP_ADD); s_alphaInfo = a; vAlphaBlendColor = DXVEC4(1,2*255.0f/256.0f,0,0); DWORD usec = a.c; if( a.a == a.b ) { // just d remains SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING); if( a.d == 0 ) { SETRS(D3DRS_ALPHABLENDENABLE, 0); } else { SETRS(D3DRS_DESTBLEND, a.d == 1 ? D3DBLEND_ONE : D3DBLEND_ZERO); SETRS(D3DRS_SRCBLEND, D3DBLEND_ZERO); SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); } goto EndSetAlpha; } else if( a.d == 2 ) { // zero if( a.a == 2 ) { // zero all color SETRS(D3DRS_SRCBLEND, D3DBLEND_ZERO); SETRS(D3DRS_DESTBLEND, D3DBLEND_ZERO); goto EndSetAlpha; } else if( a.b == 2 ) { //b2XAlphaTest = 1; SET_ALPHA_COLOR_FACTOR(1); if( bDestAlphaColor == 2 ) { SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); SETRS(D3DRS_SRCBLEND, a.a == 0 ? D3DBLEND_ONE : D3DBLEND_ZERO); SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_ZERO : D3DBLEND_ONE); } else { if( bNeedAlphaColor ) bAlphaClamping = 2; SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); SETRS(D3DRS_SRCBLEND, a.a == 0 ? blendalpha[usec] : D3DBLEND_ZERO); SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_ZERO : blendalpha[usec]); } goto EndSetAlpha; } // nothing is zero, so must do some real blending //b2XAlphaTest = 1; bAlphaClamping = 3; SET_ALPHA_COLOR_FACTOR(1); SETRS(D3DRS_BLENDOP, a.a == 0 ? D3DBLENDOP_SUBTRACT : D3DBLENDOP_REVSUBTRACT); SETRS(D3DRS_SRCBLEND, bDestAlphaColor == 2 ? D3DBLEND_ONE : blendalpha[usec]); SETRS(D3DRS_DESTBLEND, bDestAlphaColor == 2 ? D3DBLEND_ONE : blendalpha[usec]); } else if( a.a == 2 ) { // zero //b2XAlphaTest = 1; bAlphaClamping = 1; // min testing SET_ALPHA_COLOR_FACTOR(1); if( a.b == a.d ) { // can get away with 1-A SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); SETRS(D3DRS_SRCBLEND, (a.b == 0 && bDestAlphaColor != 2) ? blendinvalpha[usec] : D3DBLEND_ZERO); SETRS(D3DRS_DESTBLEND, (a.b == 0 || bDestAlphaColor == 2) ? D3DBLEND_ZERO : blendinvalpha[usec]); } else { SETRS(D3DRS_BLENDOP, a.b==0 ? D3DBLENDOP_REVSUBTRACT : D3DBLENDOP_SUBTRACT); SETRS(D3DRS_SRCBLEND, (a.b == 0 && bDestAlphaColor != 2) ? blendalpha[usec] : D3DBLEND_ONE); SETRS(D3DRS_DESTBLEND, (a.b == 0 || bDestAlphaColor == 2 ) ? D3DBLEND_ONE : blendalpha[usec]); } } else if( a.b == 2 ) { bAlphaClamping = 2; // max testing SET_ALPHA_COLOR_FACTOR(a.a!=a.d); if( a.a == a.d ) { // can get away with 1+A, but need to set alpha to negative SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); if( bDestAlphaColor == 2 ) { assert(usec==1); // all ones bNeedAlphaColor = 1; bNeedBlendFactorInAlpha = 1; vAlphaBlendColor.y = 0; vAlphaBlendColor.w = -1; SETRS(D3DRS_SRCBLEND, a.a == 0 ? D3DBLEND_INVSRCALPHA : D3DBLEND_ZERO); SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_ZERO : D3DBLEND_INVSRCALPHA); } else { SETRS(D3DRS_SRCBLEND, a.a == 0 ? blendinvalpha[usec] : D3DBLEND_ZERO); SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_ZERO : blendinvalpha[usec]); } } else { //b2XAlphaTest = 1; SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); SETRS(D3DRS_SRCBLEND, (a.a == 0 && bDestAlphaColor != 2) ? blendalpha[usec] : D3DBLEND_ONE); SETRS(D3DRS_DESTBLEND, (a.a == 0 || bDestAlphaColor == 2) ? D3DBLEND_ONE : blendalpha[usec]); } } else { // all 3 components are valid! bAlphaClamping = 3; // all testing SET_ALPHA_COLOR_FACTOR(a.a!=a.d); if( a.a == a.d ) { // can get away with 1+A, but need to set alpha to negative SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); if( bDestAlphaColor == 2 ) { assert(usec==1); // all ones bNeedAlphaColor = 1; bNeedBlendFactorInAlpha = 1; vAlphaBlendColor.y = 0; vAlphaBlendColor.w = -1; SETRS(D3DRS_SRCBLEND, a.a == 0 ? D3DBLEND_INVSRCALPHA : D3DBLEND_SRCALPHA); SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_SRCALPHA : D3DBLEND_INVSRCALPHA); } else { SETRS(D3DRS_SRCBLEND, a.a == 0 ? blendinvalpha[usec] : blendalpha[usec]); SETRS(D3DRS_DESTBLEND, a.a == 0 ? blendalpha[usec] : blendinvalpha[usec]); } } else { assert(a.b == a.d); SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD); if( bDestAlphaColor == 2 ) { assert(usec==1); // all ones bNeedAlphaColor = 1; bNeedBlendFactorInAlpha = 1; vAlphaBlendColor.y = 0; vAlphaBlendColor.w = 1; SETRS(D3DRS_SRCBLEND, a.a != 0 ? D3DBLEND_INVSRCALPHA : D3DBLEND_SRCALPHA); SETRS(D3DRS_DESTBLEND, a.a != 0 ? D3DBLEND_SRCALPHA : D3DBLEND_INVSRCALPHA); } else { //b2XAlphaTest = 1; SETRS(D3DRS_SRCBLEND, a.a != 0 ? blendinvalpha[usec] : blendalpha[usec]); SETRS(D3DRS_DESTBLEND, a.a != 0 ? blendalpha[usec] : blendinvalpha[usec]); } } } EndSetAlpha: //b2XAlphaTest = b2XAlphaTest && bNeedAlphaColor && !bNeedBlendFactorInAlpha; INC_ALPHAVARS(); } void ZeroGS::SetWriteDepth() { if( conf.mrtdepth ) { s_bWriteDepth = TRUE; s_nWriteDepthCount = 4; } } BOOL ZeroGS::IsWriteDepth() { return s_bWriteDepth; } BOOL ZeroGS::IsWriteDestAlphaTest() { return s_bWriteDepth; } void ZeroGS::SetDestAlphaTest() { s_bDestAlphaTest = TRUE; s_nWriteDestAlphaTest = 4; } void ZeroGS::SetFogColor(u32 fog) { if( 1||gs.fogcol != fog ) { gs.fogcol = fog; ZeroGS::Flush(0); ZeroGS::Flush(1); if( !g_bIsLost ) { DXVEC4 v; // set it immediately v.x = (gs.fogcol&0xff)/255.0f; v.y = ((gs.fogcol>>8)&0xff)/255.0f; v.z = ((gs.fogcol>>16)&0xff)/255.0f; SETCONSTF(GPU_FOGCOLOR, v); } } } void ZeroGS::ExtWrite() { WARN_LOG("ExtWrite\n"); // use local DISPFB, EXTDATA, EXTBUF, and PMODE // int bpp, start, end; // tex0Info texframe; // bpp = 4; // if( texframe.psm == 0x12 ) bpp = 3; // else if( texframe.psm & 2 ) bpp = 2; // // // get the start and end addresses of the buffer // GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw); } //////////// // Caches // //////////// #ifdef __x86_64__ extern "C" void TestClutChangeMMX(void* src, void* dst, int entries, void* pret); #endif bool ZeroGS::CheckChangeInClut(u32 highdword, u32 psm) { int cld = (highdword >> 29) & 0x7; int cbp = ((highdword >> 5) & 0x3fff); // processing the CLUT after tex0/2 are written switch(cld) { case 0: return false; case 1: break; // Seems to rarely not be 1. // note sure about changing cbp[0,1] case 4: return gs.cbp[0] != cbp; case 5: return gs.cbp[1] != cbp; // default: load default: break; } int cpsm = (highdword >> 19) & 0xe; int csm = (highdword >> 23) & 0x1; if( cpsm > 1 || csm ) // don't support 16bit for now return true; int csa = (highdword >> 24) & 0x1f; int entries = (psm&3)==3 ? 256 : 16; u64* src = (u64*)(g_pbyGSMemory + cbp*256); u64* dst = (u64*)(g_pbyGSClut+64*csa); bool bRet = false; #ifdef __x86_64__ TestClutChangeMMX(dst, src, entries, &bRet); #else int storeebx; // do a fast test with MMX __asm { mov storeebx, ebx mov edx, dst mov ecx, src mov ebx, entries Start: movq mm0, [edx] movq mm1, [edx+8] pcmpeqd mm0, [ecx] pcmpeqd mm1, [ecx+16] movq mm2, [edx+16] movq mm3, [edx+24] pcmpeqd mm2, [ecx+32] pcmpeqd mm3, [ecx+48] pand mm0, mm1 pand mm2, mm3 movq mm4, [edx+32] movq mm5, [edx+40] pcmpeqd mm4, [ecx+8] pcmpeqd mm5, [ecx+24] pand mm0, mm2 pand mm4, mm5 movq mm6, [edx+48] movq mm7, [edx+56] pcmpeqd mm6, [ecx+40] pcmpeqd mm7, [ecx+56] pand mm0, mm4 pand mm6, mm7 pand mm0, mm6 pmovmskb eax, mm0 cmp eax, 0xff je Continue mov bRet, 1 jmp Return Continue: cmp ebx, 16 jle Return test ebx, 0x10 jz AddEcx sub ecx, 448 // go back and down one column, AddEcx: add ecx, 256 // go to the right block cmp ebx, 0x90 jne Continue1 add ecx, 256 // skip whole block Continue1: add edx, 64 sub ebx, 16 jmp Start } Return: __asm { emms mov ebx, storeebx } #endif return bRet; } void ZeroGS::texClutWrite(int ctx) { s_bTexFlush = 0; if( g_bIsLost ) return; tex0Info& tex0 = vb[ctx].tex0; assert( PSMT_ISCLUT(tex0.psm) ); // processing the CLUT after tex0/2 are written switch(tex0.cld) { case 0: return; case 1: break; // tex0.cld is usually 1. case 2: gs.cbp[0] = tex0.cbp; break; case 3: gs.cbp[1] = tex0.cbp; break; // not sure about changing cbp[0,1] case 4: if( gs.cbp[0] == tex0.cbp ) return; gs.cbp[0] = tex0.cbp; break; case 5: if( gs.cbp[1] == tex0.cbp ) return; gs.cbp[1] = tex0.cbp; break; default: //DEBUG_LOG("cld isn't 0-5!"); break; } Flush(!ctx); int entries = (tex0.psm&3)==3 ? 256 : 16; if (tex0.csm) { switch (tex0.cpsm) { // 16bit psm // eggomania uses non16bit textures for csm2 case PSMCT16: { u16* src = (u16*)g_pbyGSMemory + tex0.cbp*128; u16 *dst = (u16*)(g_pbyGSClut+32*(tex0.csa&15)+(tex0.csa>=16?2:0)); for (int i = 0; i < entries; ++i) { *dst = src[getPixelAddress16_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)]; dst += 2; // check for wrapping if (((u32)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut+2); } break; } case PSMCT16S: { u16* src = (u16*)g_pbyGSMemory + tex0.cbp*128; u16 *dst = (u16*)(g_pbyGSClut+32*(tex0.csa&15)+(tex0.csa>=16?2:0)); for (int i = 0; i < entries; ++i) { *dst = src[getPixelAddress16S_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)]; dst += 2; // check for wrapping if (((u32)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut+2); } break; } case PSMCT32: case PSMCT24: { u32* src = (u32*)g_pbyGSMemory + tex0.cbp*64; u32 *dst = (u32*)(g_pbyGSClut+64*tex0.csa); // check if address exceeds src if( src+getPixelAddress32_0(gs.clut.cou+entries-1, gs.clut.cov, gs.clut.cbw) >= (u32*)g_pbyGSMemory + 0x00100000 ) ERROR_LOG("texClutWrite out of bounds\n"); else for(int i = 0; i < entries; ++i) { *dst = src[getPixelAddress32_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)]; dst++; } } break; default: { #ifndef RELEASE_TO_PUBLIC //DEBUG_LOG("unknown cpsm: %x (%x)\n", tex0.cpsm, tex0.psm); #endif break; } } } else { switch (tex0.cpsm) { case PSMCT24: case PSMCT32: if( entries == 16 ) WriteCLUT_T32_I4_CSM1((u32*)(g_pbyGSMemory + tex0.cbp*256), (u32*)(g_pbyGSClut+64*tex0.csa)); else WriteCLUT_T32_I8_CSM1((u32*)(g_pbyGSMemory + tex0.cbp*256), (u32*)(g_pbyGSClut+64*tex0.csa)); break; default: if( entries == 16 ) WriteCLUT_T16_I4_CSM1((u32*)(g_pbyGSMemory + 256 * tex0.cbp), (u32*)(g_pbyGSClut+32*(tex0.csa&15)+(tex0.csa>=16?2:0))); else // sse2 for 256 is more complicated, so use regular WriteCLUT_T16_I8_CSM1_c((u32*)(g_pbyGSMemory + 256 * tex0.cbp), (u32*)(g_pbyGSClut+32*(tex0.csa&15)+(tex0.csa>=16?2:0))); break; } } } void ZeroGS::SetTexFlush() { s_bTexFlush = TRUE; // if( PSMT_ISCLUT(vb[0].tex0.psm) ) // texClutWrite(0); // if( PSMT_ISCLUT(vb[1].tex0.psm) ) // texClutWrite(1); if( !s_bForceTexFlush ) { if( s_ptexCurSet[0] != s_ptexNextSet[0] ) { s_ptexCurSet[0] = s_ptexNextSet[0]; pd3dDevice->SetTexture(SAMP_MEMORY0, s_ptexNextSet[0]); } if( s_ptexCurSet[1] != s_ptexNextSet[1] ) { s_ptexCurSet[1] = s_ptexNextSet[1]; pd3dDevice->SetTexture(SAMP_MEMORY1, s_ptexNextSet[1]); } } } int ZeroGS::Save(char* pbydata) { if( pbydata == NULL ) return 40 + 0x00400000 + sizeof(gs) + 2*VBSAVELIMIT + 2*sizeof(frameInfo) + 4 + 256*4; s_RTs.ResolveAll(); s_DepthRTs.ResolveAll(); vb[0].Unlock(); vb[1].Unlock(); strcpy(pbydata, libraryName); *(u32*)(pbydata+16) = ZEROGS_SAVEVER; pbydata += 32; *(int*)pbydata = icurctx; pbydata += 4; *(int*)pbydata = VBSAVELIMIT; pbydata += 4; memcpy(pbydata, g_pbyGSMemory, 0x00400000); pbydata += 0x00400000; memcpy(pbydata, g_pbyGSClut, 256*4); pbydata += 256*4; *(int*)pbydata = sizeof(gs); pbydata += 4; memcpy(pbydata, &gs, sizeof(gs)); pbydata += sizeof(gs); for(int i = 0; i < 2; ++i) { memcpy(pbydata, &vb[i], VBSAVELIMIT); pbydata += VBSAVELIMIT; } vb[0].Lock(); vb[1].Lock(); return 0; } extern u32 s_uTex1Data[2][2], s_uClampData[2]; extern char *libraryName; bool ZeroGS::Load(char* pbydata) { memset(s_uTex1Data, 0, sizeof(s_uTex1Data)); memset(s_uClampData, 0, sizeof(s_uClampData)); // first 32 bytes are the id u32 savever = *(u32*)(pbydata+16); if( strncmp(pbydata, libraryName, 6) == 0 && (savever == ZEROGS_SAVEVER || savever == 0xaa000004) ) { g_MemTargs.Destroy(); GSStateReset(); pbydata += 32; int context = *(int*)pbydata; pbydata += 4; u32 savelimit = VBSAVELIMIT; savelimit = *(u32*)pbydata; pbydata += 4; memcpy(g_pbyGSMemory, pbydata, 0x00400000); pbydata += 0x00400000; memcpy(g_pbyGSClut, pbydata, 256*4); pbydata += 256*4; memset(&gs, 0, sizeof(gs)); int savedgssize; if( savever == 0xaa000004 ) savedgssize = 0x1d0; else { savedgssize = *(int*)pbydata; pbydata += 4; } memcpy(&gs, pbydata, savedgssize); pbydata += savedgssize; prim = &gs._prim[gs.prac]; LPD3DVB pvb = vb[0].pvb; memcpy(&vb[0], pbydata, min(savelimit, VBSAVELIMIT)); pbydata += savelimit; vb[0].pvb = pvb; pvb = vb[1].pvb; memcpy(&vb[1], pbydata, min(savelimit, VBSAVELIMIT)); pbydata += savelimit; vb[1].pvb = pvb; for(int i = 0; i < 2; ++i) { vb[i].bNeedZCheck = vb[i].bNeedFrameCheck = 1; vb[i].bSyncVars = 0; vb[i].bNeedTexCheck = 1; memset(vb[i].uCurTex0Data, 0, sizeof(vb[i].uCurTex0Data)); } icurctx = -1; pd3dDevice->SetRenderTarget(0, psurfOrgTarg); pd3dDevice->SetRenderTarget(1, NULL); pd3dDevice->SetDepthStencilSurface(psurfOrgDepth); SetFogColor(gs.fogcol); vb[0].Lock(); vb[1].Lock(); return true; } return false; } void ZeroGS::SaveSnapshot(const char* filename) { g_bMakeSnapshot = 1; strSnapshot = filename; } // AVI capture stuff bool ZeroGS::StartCapture() { if( !s_aviinit ) { START_AVI("zerogs.avi"); assert( s_ptexAVICapture == NULL ); if( FAILED(pd3dDevice->CreateOffscreenPlainSurface(width, height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &s_ptexAVICapture, NULL)) ) { STOP_AVI(); ZeroGS::AddMessage("Failed to create avi"); return false; } s_aviinit = 1; } else { DEBUG_LOG("ZeroGS: Continuing from previous capture"); } s_avicapturing = 1; return true; } void ZeroGS::StopCapture() { s_avicapturing = 0; } void ZeroGS::CaptureFrame() { assert( s_avicapturing && s_aviinit && s_ptexAVICapture != NULL ); vector mem; pd3dDevice->GetRenderTargetData(psurfOrgTarg, s_ptexAVICapture); D3DLOCKED_RECT lock; mem.resize(width * height * 4); s_ptexAVICapture->LockRect(&lock, NULL, D3DLOCK_READONLY); assert( lock.Pitch == width*4 ); BYTE* pend = (BYTE*)lock.pBits + (conf.height-1)*width*4; for(int i = 0; i < conf.height; ++i) { memcpy_amd(&mem[width*4*i], pend - width*4*i, width * 4); } s_ptexAVICapture->UnlockRect(); int fps = SMODE1->CMOD == 3 ? 50 : 60; bool bSuccess = ADD_FRAME_FROM_DIB_TO_AVI("AAAA", fps, width, height, 32, &mem[0]); if( !bSuccess ) { s_avicapturing = 0; STOP_AVI(); s_aviinit = 0; ZeroGS::AddMessage("Failed to create avi"); return; } }