pcsx2/plugins/zerogs/dx/zerogs.cpp

5107 lines
142 KiB
C++

/* ZeroGS
* Copyright (C) 2005-2006 zerofrog@gmail.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#if defined(_WIN32) || defined(__WIN32__)
#include <d3dx9.h>
#include <dxerr9.h>
#include <aviUtil.h>
#endif
#include <stdio.h>
#include <malloc.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <tchar.h>
#include "GS.h"
#include "Mem.h"
#include "x86.h"
#include "Regs.h"
#include "zerogs.h"
#include "resource.h"
#include "zerogsshaders/zerogsshaders.h"
#include "targets.h"
#define DEBUG_PS2 0
#define POINT_BUFFERFLUSH 512
#define POINT_BUFFERSIZE (1<<18)
#define MINMAX_SHIFT 3
#define MAX_ACTIVECLUTS 16
#define ZEROGS_SAVEVER 0xaa000005
#define STENCIL_ALPHABIT 1 // if set, dest alpha >= 0x80
#define STENCIL_PIXELWRITE 2 // if set, pixel just written (reset after every Flush)
#define STENCIL_FBA 4 // if set, just written pixel's alpha >= 0 (reset after every Flush)
#define STENCIL_SPECIAL 8 // if set, indicates that pixel passed its alpha test (reset after every Flush)
//#define STENCIL_PBE 16
#define STENCIL_CLEAR (2|4|8|16)
#define VBSAVELIMIT ((u32)((u8*)&vb[0].nNextFrameHeight-(u8*)&vb[0]))
using namespace ZeroGS;
static LPDIRECT3D9 pD3D = NULL; // Used to create the D3DDevice
LPDIRECT3DDEVICE9 pd3dDevice = NULL;
static DXVEC4 s_vznorm;
extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve;
extern char *libraryName;
extern int g_nFrame, g_nRealFrame;
extern float fFPS;
extern unsigned char revision, build, minor;
BOOL g_bDisplayMsg = 1;
extern HINSTANCE hInst;
BOOL g_bSaveFlushedFrame = 1;
BOOL g_bIsLost = 0;
int g_nFrameRender = 10;
int g_nFramesSkipped = 0;
int g_MaxRenderedHeight = 0;
#ifdef RELEASE_TO_PUBLIC
#define INC_GENVARS()
#define INC_TEXVARS()
#define INC_ALPHAVARS()
#define INC_RESOLVE()
#define g_bUpdateEffect 0
#define g_bWriteProfile 0
#define g_bSaveTex 0
#define g_bSaveTrans 0
#define g_bSaveFrame 0
#define g_bSaveFinalFrame 0
#define g_bUpdateStencil 1
#define g_bSaveResolved 0
#else
#define INC_GENVARS() ++g_nGenVars
#define INC_TEXVARS() ++g_nTexVars
#define INC_ALPHAVARS() ++g_nAlphaVars
#define INC_RESOLVE() ++g_nResolve
BOOL g_bSaveTrans = 0;
BOOL g_bUpdateEffect = 0;
BOOL g_bWriteProfile = 0;
BOOL g_bSaveTex = 0; // saves the curent texture
BOOL g_bSaveFrame = 0; // saves the current psurfTarget
BOOL g_bSaveFinalFrame = 0; // saves the input to the CRTC
BOOL g_bUpdateStencil = 1; // only needed for dest alpha test (unfortunately, it has to be on all the time)
BOOL g_bSaveResolved = 0;
#endif
#define DRAW() pd3dDevice->DrawPrimitive(primtype[curvb.curprim.prim], 0, curvb.dwCount)
extern int s_frameskipping;
//inline void SetRenderTarget_(int index, LPD3DSURF psurf, int counter, const char* pname)
//{
// static LPD3DSURF ptargs[4] = {NULL};
// static int counters[4] = {0};
// static const char* pnames[4] = {NULL};
//
// if( ptargs[index] == psurf && psurf != NULL )
// DEBUG_LOG("duplicate targets\n");
// pd3dDevice->SetRenderTarget(index, psurf);
// ptargs[index] = psurf;
// counters[index] = counter;
// pnames[index] = pname;
//}
//
//#define SetRenderTarget(index, psurf) SetRenderTarget_(index, psurf, __COUNTER__, __FUNCTION__)
static u32 g_SaveFrameNum = 0;
BOOL g_bMakeSnapshot = 0;
string strSnapshot;
int GPU_TEXWIDTH = 512;
float g_fiGPU_TEXWIDTH = 1/512.0f;
int g_MaxTexWidth = 4096, g_MaxTexHeight = 4096;
// AVI Capture
static int s_aviinit = 0;
static int s_avicapturing = 0;
static LPD3DSURF s_ptexAVICapture = NULL; // system memory texture
const u32 g_primmult[8] = { 1, 2, 2, 3, 3, 3, 2, 0xff };
const u32 g_primsub[8] = { 1, 2, 1, 3, 1, 1, 2, 0 };
inline DWORD FtoDW(float f) { return (*((DWORD*)&f)); }
float g_fBlockMult = 1;
static int s_nFullscreen = 0;
int g_nDepthUpdateCount = 0;
int g_nDepthBias = 0;
// Consts
static const D3DPRIMITIVETYPE primtype[8] = { D3DPT_POINTLIST, D3DPT_LINELIST, D3DPT_LINELIST, D3DPT_TRIANGLELIST, D3DPT_TRIANGLELIST, D3DPT_TRIANGLELIST, D3DPT_TRIANGLELIST, D3DPT_FORCE_DWORD };
static const DWORD blendalpha[3] = { D3DBLEND_SRCALPHA, D3DBLEND_DESTALPHA, D3DBLEND_BLENDFACTOR };
static const DWORD blendinvalpha[3] = { D3DBLEND_INVSRCALPHA, D3DBLEND_INVDESTALPHA, D3DBLEND_INVBLENDFACTOR };
static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA)
static const DWORD g_dwAlphaCmp[] = { D3DCMP_NEVER, D3DCMP_ALWAYS, D3DCMP_LESS, D3DCMP_LESSEQUAL,
D3DCMP_EQUAL, D3DCMP_GREATEREQUAL, D3DCMP_GREATER, D3DCMP_NOTEQUAL };
// used for afail case
static const DWORD g_dwReverseAlphaCmp[] = { D3DCMP_ALWAYS, D3DCMP_NEVER, D3DCMP_GREATEREQUAL, D3DCMP_GREATER,
D3DCMP_NOTEQUAL, D3DCMP_LESS, D3DCMP_LESSEQUAL, D3DCMP_EQUAL };
static const DWORD g_dwZCmp[] = { D3DCMP_NEVER, D3DCMP_ALWAYS, D3DCMP_GREATEREQUAL, D3DCMP_GREATER };
/////////////////////
// graphics resources
static LPD3DDECL pdecl = NULL;
static LPD3DVS pvs[16] = {NULL};
static LPD3DPS ppsRegular[4] = {NULL}, ppsTexture[NUM_SHADERS] = {NULL};
static LPD3DPS ppsCRTC[2] = {NULL}, ppsCRTC24[2] = {NULL}, ppsCRTCTarg[2] = {NULL};
int g_nPixelShaderVer = SHADER_30; // default
static BYTE* s_lpShaderResources = NULL;
static map<int, SHADERHEADER*> mapShaderResources;
LPD3DTEX s_ptexCurSet[2] = {NULL};
#define s_bForceTexFlush 1
static LPD3DTEX s_ptexNextSet[2] = {NULL};
static ID3DXFont* pFont = NULL;
static ID3DXSprite* pSprite = NULL;
static LPD3DSURF psurfOrgTarg = NULL, psurfOrgDepth = NULL;
LPD3DTEX ptexBlocks = NULL, ptexConv16to32 = NULL; // holds information on block tiling
LPD3DTEX ptexBilinearBlocks = NULL;
IDirect3DVolumeTexture9* ptexConv32to16 = NULL;
static LPD3DTEX s_ptexInterlace = NULL; // holds interlace fields
static int s_nInterlaceTexWidth = 0; // width of texture
static list<LPD3DTEX> s_vecTempTextures; // temporary textures, released at the end of every frame
static BOOL s_bTexFlush = FALSE;
static LPD3DTEX ptexLogo = NULL;
static BOOL s_bWriteDepth = FALSE;
static BOOL s_bDestAlphaTest = FALSE;
static int s_nLastResolveReset = 0;
static int s_nResolveCounts[30] = {0}; // resolve counts for last 30 frames
static int s_nCurResolveIndex = 0;
int s_nResolved = 0; // number of targets resolved this frame
int g_nDepthUsed = 0; // ffx2 pal movies
static int s_nWriteDepthCount = 0;
static int s_nWireframeCount = 0;
static int s_nWriteDestAlphaTest = 0;
////////////////////
// State parameters
static float fiRendWidth, fiRendHeight;
static DWORD dwStencilRef, dwStencilMask;
static DXVEC4 vAlphaBlendColor; // used for GPU_COLOR
static BYTE bNeedBlendFactorInAlpha; // set if the output source alpha is different from the real source alpha (only when blend factor > 0x80)
static DWORD s_dwColorWrite = 0xf; // the color write mask of the current target
BOOL g_bDisplayFPS = FALSE;
union {
struct {
BYTE _bNeedAlphaColor; // set if vAlphaBlendColor needs to be set
BYTE _b2XAlphaTest; // Only valid when bNeedAlphaColor is set. if 1st bit set set, double all alpha testing values
// otherwise alpha testing needs to be done separately.
BYTE _bDestAlphaColor; // set to 1 if blending with dest color (process only one tri at a time). If 2, dest alpha is always 1.
BYTE _bAlphaClamping; // if first bit is set, do min; if second bit, do max
};
u32 _bAlphaState;
} g_vars;
#define bNeedAlphaColor g_vars._bNeedAlphaColor
#define b2XAlphaTest g_vars._b2XAlphaTest
#define bDestAlphaColor g_vars._bDestAlphaColor
#define bAlphaClamping g_vars._bAlphaClamping
int g_PrevBitwiseTexX = -1, g_PrevBitwiseTexY = -1; // textures stored in SAMP_BITWISEANDX and SAMP_BITWISEANDY
// stores the buffers for the last RenderCRTC
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
static BOOL s_bAlphaSet = FALSE;
static alphaInfo s_alphaInfo;
namespace ZeroGS
{
VB vb[2];
float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
LONG width, height;
u8* g_pbyGSMemory = NULL; // 4Mb GS system mem
u8* g_pbyGSClut = NULL;
D3DPRESENT_PARAMETERS d3dpp;
BYTE s_AAx = 0, s_AAy = 0; // if AAy is set, then AAx has to be set
BYTE bIndepWriteMasks = 1;
BOOL s_bBeginScene = FALSE;
D3DFORMAT g_RenderFormat = D3DFMT_A16B16G16R16F;
int icurctx = -1;
LPD3DVB pvbRect = NULL;
DXVEC4 g_vdepth = DXVEC4(65536.0f, 256.0f, 1.0f, 65536.0f*256.0f);
LPD3DVS pvsBitBlt = NULL, pvsBitBlt30 = NULL;
LPD3DPS ppsBitBlt[2] = {NULL}, ppsBitBltDepth[2] = {NULL}, ppsBitBltDepthTex[2] = {NULL}, ppsOne = NULL;
LPD3DPS ppsBaseTexture = NULL, ppsConvert16to32 = NULL, ppsConvert32to16 = NULL;
extern CRangeManager s_RangeMngr; // manages overwritten memory
void FlushTransferRanges(const tex0Info* ptex);
// returns the first and last addresses aligned to a page that cover
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
HRESULT LoadEffects();
HRESULT LoadExtraEffects();
LPD3DPS LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context);
static int s_nNewWidth = -1, s_nNewHeight = -1;
void ChangeDeviceSize(int nNewWidth, int nNewHeight);
void ProcessMessages();
void RenderCustom(float fAlpha); // intro anim
struct MESSAGE
{
MESSAGE() {}
MESSAGE(const char* p, DWORD dw) { strcpy(str, p); dwTimeStamp = dw; }
char str[255];
DWORD dwTimeStamp;
};
static list<MESSAGE> listMsgs;
///////////////////////
// Method Prototypes //
///////////////////////
void AdjustTransToAspect(DXVEC4& v, int dispwidth, int dispheight);
void KickPoint();
void KickLine();
void KickTriangle();
void KickTriangleFan();
void KickSprite();
void KickDummy();
inline void SetContextTarget(int context);
// use to update the d3d state
void SetTexVariables(int context);
void SetAlphaVariables(const alphaInfo& ainfo);
void ResetAlphaVariables();
__forceinline void SetAlphaTestInt(pixTest curtest);
__forceinline void RenderAlphaTest(const VB& curvb);
__forceinline void RenderStencil(const VB& curvb, DWORD dwUsingSpecialTesting);
__forceinline void ProcessStencil(const VB& curvb);
__forceinline void RenderFBA(const VB& curvb);
__forceinline void ProcessFBA(const VB& curvb);
void ResolveInRange(int start, int end);
void ExtWrite();
inline LPD3DTEX CreateInterlaceTex(int width) {
if( width == s_nInterlaceTexWidth && s_ptexInterlace != NULL ) return s_ptexInterlace;
SAFE_RELEASE(s_ptexInterlace);
s_nInterlaceTexWidth = width;
HRESULT hr;
V(pd3dDevice->CreateTexture(width, 1, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &s_ptexInterlace, NULL));
D3DLOCKED_RECT lock;
s_ptexInterlace->LockRect(0, &lock, NULL, 0);
for(int i = 0; i < width; ++i) ((u32*)lock.pBits)[i] = (i&1) ? 0xffffffff : 0;
s_ptexInterlace->UnlockRect(0);
return s_ptexInterlace;
}
DrawFn drawfn[8] = { KickDummy, KickDummy, KickDummy, KickDummy,
KickDummy, KickDummy, KickDummy, KickDummy };
}; // end namespace
///////////////////
// Context State //
///////////////////
ZeroGS::VB::VB()
{
memset(this, 0, sizeof(VB));
tex0.tw = 1;
tex0.th = 1;
}
ZeroGS::VB::~VB()
{
Destroy();
}
void ZeroGS::VB::Destroy()
{
Unlock();
SAFE_RELEASE(pvb);
prndr = NULL;
pdepth = NULL;
}
void ZeroGS::VB::Lock()
{
assert(pvb != NULL);
if( pbuf == NULL )
{
if( dwCurOff+POINT_BUFFERFLUSH > POINT_BUFFERSIZE ) dwCurOff = 0;
pvb->Lock(dwCurOff*sizeof(VertexGPU), sizeof(VertexGPU)*POINT_BUFFERFLUSH, (void**)&pbuf, dwCurOff ? D3DLOCK_NOOVERWRITE|D3DLOCK_NOSYSLOCK : D3DLOCK_DISCARD|D3DLOCK_NOSYSLOCK);
dwCount = 0;
assert( pbuf != NULL );
}
}
bool ZeroGS::VB::CheckPrim()
{
Lock();
if( (PRIMMASK & prim->_val) != (PRIMMASK & curprim._val) || primtype[prim->prim] != primtype[curprim.prim] )
return dwCount > 0;
return false;
}
// upper bound on max possible height
#define GET_MAXHEIGHT(fbp, fbw, psm) ((((0x00100000-64*(fbp))/(fbw))&~0x1f)<<((psm&2)?1:0))
#include <set>
static int maxmin = 608;
//static set<int> s_setFBP[2]; // previous frame/zbuf pointers for the last 2 frames
//static int s_nCurFBPSet = 0;
//static map<int, int> s_mapFrameHeights[2];
//static int s_nCurFrameMap = 0;
// a lot of times, target is too big and overwrites the texture using, if tbp != 0, use it to bound
void ZeroGS::VB::CheckFrame(int tbp)
{
static int bChanged;
if( bNeedZCheck ) {
PRIM_LOG("zbuf_%d: zbp=0x%x psm=0x%x, zmsk=%d\n", ictx, zbuf.zbp, zbuf.psm, zbuf.zmsk);
//zbuf = *zb;
}
// invalid bpp
if( m_Blocks[gsfb.psm].bpp == 0 ) {
ERROR_LOG("CheckFrame invalid bpp %d\n", gsfb.psm);
return;
}
bChanged = 0;
if( gsfb.fbw <= 0 ) {
return;
}
if( bNeedFrameCheck ) {
int maxpos = 0x00100000;
// important to set before calling GetTarg
bNeedFrameCheck = 0;
bNeedZCheck = 0;
// add constraints of other targets
if( gsfb.fbw > 0 ) {
maxpos = 0x00100000-64*gsfb.fbp;
// make sure texture is far away from tbp
if( gsfb.fbp < tbp && gsfb.fbp + 0x2000 < tbp) {
maxpos = min(64*(tbp-gsfb.fbp), maxpos);
}
if( prndr != NULL ) {
// offroad uses 0x80 fbp which messes up targets
if( gsfb.fbp + 0x80 < frame.fbp ) {
// special case when double buffering (hamsterball)
maxpos = min(64*(frame.fbp-gsfb.fbp), maxpos);
}
}
if( zbuf.zbp < tbp && !zbuf.zmsk ) {
maxpos = min((tbp-zbuf.zbp)*((zbuf.psm&2)?128:64), maxpos);
}
// old caching method
if( gsfb.fbp < zbuf.zbp && !zbuf.zmsk ) { // zmsk necessary for KH movie
int temp = 64*(zbuf.zbp-gsfb.fbp);//min( (0x00100000-64*zbuf.zbp) , 64*(zbuf.zbp-gsfb.fbp) );
maxpos = min(temp, maxpos);
}
maxpos /= gsfb.fbw;
if( gsfb.psm & 2 )
maxpos *= 2;
maxpos = min(gsfb.fbh, maxpos);
maxpos = min(maxmin, maxpos);
//? atelier iris crashes without it
if( maxpos > 256 )
maxpos &= ~0x1f;
}
else {
ERROR_LOG("render target null, ignoring\n");
//prndr = NULL;
//pdepth = NULL;
return;
}
gsfb.psm &= 0xf; // shadow tower
if( prndr != NULL ) {
// render target
if( prndr->psm != gsfb.psm ) {
// behavior for dest alpha varies
ResetAlphaVariables();
}
}
int fbh = (scissor.y1>>MINMAX_SHIFT)+1;
if( fbh > 2 && (fbh&1) ) fbh -= 1;
if( !(gsfb.psm&2) || !(g_GameSettings&GAME_FULL16BITRES) ) {
fbh = min(fbh, maxpos);
}
frame = gsfb;
if (frame.fbw > 1024) frame.fbw = 1024;
// if( fbh > 256 && (fbh % m_Blocks[gsfb.psm].height) <= 2 ) {
// // dragon ball z
// fbh -= fbh%m_Blocks[gsfb.psm].height;
// }
if( !(frame.psm&2) || !(g_GameSettings&GAME_FULL16BITRES) )
frame.fbh = fbh;
if( !(frame.psm&2) ) {//|| !(g_GameSettings&GAME_FULL16BITRES) ) {
if( frame.fbh >= 512 ) {
// neopets hack
maxmin = min(maxmin, frame.fbh);
frame.fbh = maxmin;
}
}
// mgs3 hack to get proper resolution, targets after 0x2000 are usually feedback
if( g_MaxRenderedHeight >= 0xe0 && frame.fbp >= 0x2000 ) {
int considerheight = (g_MaxRenderedHeight/8+31)&~31;
if( frame.fbh > considerheight )
frame.fbh = considerheight;
else if( frame.fbh <= 32 )
frame.fbh = considerheight;
if( frame.fbh == considerheight ) {
// stops bad resolves (mgs3)
if( !curprim.abe && (!test.ate || test.atst == 0) )
s_nResolved |= 0x100;
}
}
// ffxii hack to stop resolving
if( !(frame.psm&2) || !(g_GameSettings&GAME_FULL16BITRES) ) {
if( frame.fbp >= 0x3000 && fbh >= 0x1a0 ) {
int endfbp = frame.fbp + frame.fbw*fbh/((gsfb.psm&2)?128:64);
// see if there is a previous render target in the way, reduce
for(CRenderTargetMngr::MAPTARGETS::iterator itnew = s_RTs.mapTargets.begin(); itnew != s_RTs.mapTargets.end(); ++itnew) {
if( itnew->second->fbp > frame.fbp && endfbp > itnew->second->fbp ) {
endfbp = itnew->second->fbp;
}
}
frame.fbh = (endfbp-frame.fbp)*((gsfb.psm&2)?128:64)/frame.fbw;
}
}
CRenderTarget* pprevrndr = prndr;
CDepthTarget* pprevdepth = pdepth;
// reset so that Resolve doesn't call Flush
prndr = NULL;
pdepth = NULL;
CRenderTarget* pnewtarg = s_RTs.GetTarg(frame, 0, maxmin);
assert( pnewtarg != NULL );
// pnewtarg->fbh >= 0x1c0 needed for ffx
if( pnewtarg->fbh >= 0x1c0 && pnewtarg->fbh > frame.fbh && zbuf.zbp < tbp && !zbuf.zmsk ) {
// check if zbuf is in the way of the texture (suikoden5)
int maxallowedfbh = (tbp-zbuf.zbp)*((zbuf.psm&2)?128:64) / gsfb.fbw;
if( gsfb.psm & 2 )
maxallowedfbh *= 2;
if( pnewtarg->fbh > maxallowedfbh+32 ) { // +32 needed for ffx2
// destroy and recreate
s_RTs.DestroyAll(0, 0x100, pnewtarg->fbw);
pnewtarg = s_RTs.GetTarg(frame, 0, maxmin);
assert( pnewtarg != NULL );
}
}
PRIM_LOG("frame_%d: fbp=0x%x fbw=%d fbh=%d(%d) psm=0x%x fbm=0x%x\n", ictx, gsfb.fbp, gsfb.fbw, gsfb.fbh, pnewtarg->fbh, gsfb.psm, gsfb.fbm);
if( (pprevrndr != pnewtarg) || (prndr != NULL && (prndr->status & CRenderTarget::TS_NeedUpdate)) )
bChanged = 1;
prndr = pnewtarg;
// update z
frameInfo tempfb;
tempfb.fbw = prndr->fbw;
tempfb.fbp = zbuf.zbp;
tempfb.psm = zbuf.psm;
tempfb.fbh = prndr->targheight;
if( zbuf.psm == 0x31 ) tempfb.fbm = 0xff000000;
else tempfb.fbm = 0;
// check if there is a target that exactly aligns with zbuf (zbuf can be cleared this way, gunbird 2)
//u32 key = zbuf.zbp|(frame.fbw<<16);
//CRenderTargetMngr::MAPTARGETS::iterator it = s_RTs.mapTargets.find(key);
// if( it != s_RTs.mapTargets.end() ) {
//#ifdef PCSX2_DEBUG
// DEBUG_LOG("zbuf resolve\n");
//#endif
// if( it->second->status & CRenderTarget::TS_Resolved )
// it->second->Resolve();
// }
CDepthTarget* pnewdepth = (CDepthTarget*)s_DepthRTs.GetTarg(tempfb, CRenderTargetMngr::TO_DepthBuffer |
CRenderTargetMngr::TO_StrictHeight|(zbuf.zmsk?CRenderTargetMngr::TO_Virtual:0),
prndr->targheight);//GET_MAXHEIGHT(zbuf.zbp, gsfb.fbw, 0));
assert( pnewdepth != NULL && prndr != NULL );
assert( pnewdepth->fbh == prndr->targheight );
if( (pprevdepth != pnewdepth) || (pdepth != NULL && (pdepth->status & CRenderTarget::TS_NeedUpdate)) )
bChanged |= 2;
pdepth = pnewdepth;
if( prndr->status & CRenderTarget::TS_NeedConvert32) {
if( pdepth->pdepth != NULL )
pd3dDevice->SetDepthStencilSurface(pdepth->pdepth);
prndr->fbh *= 2;
prndr->targheight *= 2;
prndr->ConvertTo32();
prndr->status &= ~CRenderTarget::TS_NeedConvert32;
}
else if( prndr->status & CRenderTarget::TS_NeedConvert16 ) {
if( pdepth->pdepth != NULL )
pd3dDevice->SetDepthStencilSurface(pdepth->pdepth);
prndr->fbh /= 2;
prndr->targheight /= 2;
prndr->ConvertTo16();
prndr->status &= ~CRenderTarget::TS_NeedConvert16;
}
}
else if( bNeedZCheck ) {
bNeedZCheck = 0;
CDepthTarget* pprevdepth = pdepth;
pdepth = NULL;
if( prndr != NULL && gsfb.fbw > 0 ) {
// just z changed
frameInfo f;
f.fbp = zbuf.zbp;
f.fbw = prndr->fbw;
f.fbh = prndr->fbh;
f.psm = zbuf.psm;
if( zbuf.psm == 0x31 ) f.fbm = 0xff000000;
else f.fbm = 0;
CDepthTarget* pnewdepth = (CDepthTarget*)s_DepthRTs.GetTarg(f, CRenderTargetMngr::TO_DepthBuffer|CRenderTargetMngr::TO_StrictHeight|
(zbuf.zmsk?CRenderTargetMngr::TO_Virtual:0), prndr->fbh);//GET_MAXHEIGHT(zbuf.zbp, gsfb.fbw, 0));
assert( pnewdepth != NULL && prndr != NULL );
assert( pnewdepth->fbh == prndr->fbh );
if( (pprevdepth != pnewdepth) || (pdepth != NULL && (pdepth->status & CRenderTarget::TS_NeedUpdate)) )
bChanged = 2;
pdepth = pnewdepth;
}
}
s_nResolved &= 0xff; // restore
if( prndr != NULL )
SetContextTarget(ictx);
//if( prndr != NULL && ictx == icurctx)
//else bVarsSetTarg = 0;
// if( prndr != NULL && bChanged ) {
// if( ictx == icurctx ) SetContextTarget(icurctx);
// else
// bVarsSetTarg = 0;
// }
}
void ZeroGS::VB::FlushTexData()
{
assert( bNeedTexCheck );
bNeedTexCheck = 0;
u32 psm = (uNextTex0Data[0] >> 20) & 0x3f;
if( psm == 9 ) psm = 1; // hmm..., ffx intro menu
// don't update unless necessary
if( uCurTex0Data[0] == uNextTex0Data[0] && (uCurTex0Data[1]&0x1f) == (uNextTex0Data[1]&0x1f) ) {
if( PSMT_ISCLUT(psm) ) {
// have to write the CLUT again if changed
if( (uCurTex0Data[1]&0x1fffffe0) == (uNextTex0Data[1]&0x1fffffe0) ) {
if( uNextTex0Data[1]&0xe0000000 ) {
//ZeroGS::Flush(ictx);
ZeroGS::texClutWrite(ictx);
// invalidate to make sure target didn't change!
bVarsTexSync = FALSE;
}
return;
}
if( (uNextTex0Data[1]&0xe0000000) == 0 ) {
if( (uCurTex0Data[1]&0x1ff10000) != (uNextTex0Data[1]&0x1ff10000) )
ZeroGS::Flush(ictx);
// clut isn't going to be loaded so can ignore, but at least update CSA and CPSM!
uCurTex0Data[1] = (uCurTex0Data[1]&0xe087ffff)|(uNextTex0Data[1]&0x1f780000);
if( tex0.cpsm <= 1 ) tex0.csa = (uNextTex0Data[1] >> 24) & 0xf;
else tex0.csa = (uNextTex0Data[1] >> 24) & 0x1f;
tex0.cpsm = (uNextTex0Data[1] >> 19) & 0xe;
ZeroGS::texClutWrite(ictx);
bVarsTexSync = FALSE;
return;
}
// fall through
}
else {
//bVarsTexSync = FALSE;
return;
}
}
ZeroGS::Flush(ictx);
bVarsTexSync = FALSE;
bTexConstsSync = FALSE;
uCurTex0Data[0] = uNextTex0Data[0];
uCurTex0Data[1] = uNextTex0Data[1];
tex0.tbp0 = (uNextTex0Data[0] & 0x3fff);
tex0.tbw = ((uNextTex0Data[0] >> 14) & 0x3f) * 64;
tex0.psm = psm;
tex0.tw = (uNextTex0Data[0] >> 26) & 0xf;
if (tex0.tw > 10) tex0.tw = 10;
tex0.tw = 1<<tex0.tw;
tex0.th = ((uNextTex0Data[0] >> 30) & 0x3) | ((uNextTex0Data[1] & 0x3) << 2);
if (tex0.th > 10) tex0.th = 10;
tex0.th = 1<<tex0.th;
tex0.tcc = (uNextTex0Data[1] >> 2) & 0x1;
tex0.tfx = (uNextTex0Data[1] >> 3) & 0x3;
ZeroGS::fiTexWidth[ictx] = (1/16.0f)/ tex0.tw;
ZeroGS::fiTexHeight[ictx] = (1/16.0f) / tex0.th;
if (tex0.tbw == 0) tex0.tbw = 64;
if( PSMT_ISCLUT(psm) ) {
tex0.cbp = ((uNextTex0Data[1] >> 5) & 0x3fff);
tex0.cpsm = (uNextTex0Data[1] >> 19) & 0xe;
tex0.csm = (uNextTex0Data[1] >> 23) & 0x1;
if( tex0.cpsm <= 1 ) tex0.csa = (uNextTex0Data[1] >> 24) & 0xf;
else tex0.csa = (uNextTex0Data[1] >> 24) & 0x1f;
tex0.cld = (uNextTex0Data[1] >> 29) & 0x7;
ZeroGS::texClutWrite(ictx);
}
}
// does one time only initializing/destruction
class ZeroGSInit
{
public:
ZeroGSInit() {
// clear
g_pbyGSMemory = (u8*)_aligned_malloc(0x00410000, 1024); // leave some room for out of range accesses (saves on the checks)
memset(g_pbyGSMemory, 0, 0x00410000);
g_pbyGSClut = (u8*)_aligned_malloc(256*8, 1024); // need 512 alignment!
memset(g_pbyGSClut, 0, 256*8);
}
~ZeroGSInit() {
_aligned_free(g_pbyGSMemory); g_pbyGSMemory = NULL;
_aligned_free(g_pbyGSClut); g_pbyGSClut = NULL;
}
};
static ZeroGSInit s_ZeroGSInit;
HRESULT ZeroGS::Create(LONG _width, LONG _height)
{
Destroy(1);
GSStateReset();
width = _width;
height = _height;
fiRendWidth = 1.0f / width;
fiRendHeight = 1.0f / height;
HRESULT hr;
if( NULL == (pD3D = Direct3DCreate9(D3D_SDK_VERSION)) ) {
ERROR_LOG(_T("Failed to create the direct3d interface."));
return E_FAIL;
}
D3DDISPLAYMODE d3ddm;
if( FAILED( hr = pD3D->GetAdapterDisplayMode( D3DADAPTER_DEFAULT, &d3ddm ) ) ) {
ERROR_LOG(_T("Error geting default adapter."));
return hr;
}
if( conf.options & GSOPTION_FULLSCREEN ) {
// choose best mode
// RECT rcdesktop;
// GetWindowRect(GetDesktopWindow(), &rcdesktop);
// width = rcdesktop.right - rcdesktop.left;
// height = rcdesktop.bottom - rcdesktop.top;
// width = height = 0;
// D3DDISPLAYMODE d3ddmtemp;
//
// int modes = pD3D->GetAdapterModeCount(D3DADAPTER_DEFAULT, d3ddm.Format);
// for(int i= 0; i < modes; ++i) {
// pD3D->EnumAdapterModes(D3DADAPTER_DEFAULT, d3ddm.Format, i, &d3ddmtemp);
//
// if( abs(1024-(int)d3ddmtemp.Width) <= abs(1280-width) && abs(768-(int)d3ddmtemp.Height) <= abs(1024-height) ) {
// width = d3ddmtemp.Width;
// height = d3ddmtemp.Height;
// }
// }
}
else {
// change to default resolution
ChangeDisplaySettings(NULL, 0);
}
// Set up the structure used to create the D3DDevice. Since we are now
// using more complex geometry, we will create a device with a zbuffer.
ZeroMemory( &d3dpp, sizeof(d3dpp) );
d3dpp.Windowed = !(conf.options & GSOPTION_FULLSCREEN);
d3dpp.hDeviceWindow = GShwnd;
d3dpp.SwapEffect = (conf.options & GSOPTION_FULLSCREEN) ? D3DSWAPEFFECT_FLIP : D3DSWAPEFFECT_DISCARD;
d3dpp.BackBufferFormat = D3DFMT_A8R8G8B8;
d3dpp.EnableAutoDepthStencil = TRUE;
d3dpp.AutoDepthStencilFormat = D3DFMT_D24S8;
d3dpp.BackBufferWidth = width;
d3dpp.BackBufferHeight = height;
d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;//(conf.options & GSOPTION_FULLSCREEN) ? D3DPRESENT_INTERVAL_DEFAULT : D3DPRESENT_INTERVAL_IMMEDIATE;
d3dpp.Flags = DEBUG_PS2 ? D3DPRESENTFLAG_LOCKABLE_BACKBUFFER : 0;
s_nFullscreen = (conf.options & GSOPTION_FULLSCREEN) ? 1 : 0;
// Create the D3DDevice
UINT adapter = D3DADAPTER_DEFAULT;
D3DDEVTYPE devtype = !DEBUG_PS2 ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF;
#ifndef PCSX2_DEBUG
DWORD hwoptions = D3DCREATE_HARDWARE_VERTEXPROCESSING|D3DCREATE_PUREDEVICE;
#else
DWORD hwoptions = D3DCREATE_HARDWARE_VERTEXPROCESSING;
#endif
#ifndef RELEASE_TO_PUBLIC
for(UINT i = 0; i < pD3D->GetAdapterCount(); ++i) {
D3DADAPTER_IDENTIFIER9 id;
HRESULT hr = pD3D->GetAdapterIdentifier(i, 0, &id);
if( strcmp(id.Description, "NVIDIA NVPerfHUD") == 0 ) {
DEBUG_LOG("Using %s adapter\n", id.Description);
adapter = i;
devtype = D3DDEVTYPE_REF;
break;
}
}
#endif
if( FAILED( hr = pD3D->CreateDevice( adapter, devtype, GShwnd,
!DEBUG_PS2 ? hwoptions : D3DCREATE_SOFTWARE_VERTEXPROCESSING, &d3dpp, &pd3dDevice ) ) )
{
ERROR_LOG(_T("Failed to create hardware device, creating software.\n"));
if( FAILED( hr = pD3D->CreateDevice( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, GShwnd,
D3DCREATE_SOFTWARE_VERTEXPROCESSING,
&d3dpp, &pd3dDevice ) ) )
{
ERROR_LOG(_T("Failed to create software device, switching to reference rasterizer.\n"));
if( FAILED( hr = pD3D->CreateDevice( D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GShwnd,
D3DCREATE_SOFTWARE_VERTEXPROCESSING,
&d3dpp, &pd3dDevice ) ) )
return hr;
}
}
// get caps and check if gfx card is ok
D3DCAPS9 caps;
pd3dDevice->GetDeviceCaps(&caps);
if( caps.VertexShaderVersion < D3DVS_VERSION(2,0) ) {
ERROR_LOG("*********\nGS ERROR: Need at least vs2.0\n*********\n");
Destroy(1);
return E_FAIL;
}
conf.mrtdepth = 1;
if( caps.NumSimultaneousRTs == 1 ) {
ERROR_LOG("*********\nGS WARNING: Need at least 2 simultaneous render targets. Some zbuffer effects will look wrong\n*********\n");
conf.mrtdepth = 0;
}
if( !(caps.PrimitiveMiscCaps & D3DPMISCCAPS_SEPARATEALPHABLEND) ) {
ERROR_LOG("*********\nGS ERROR: Need separate alpha blending! Some effects will look bad\n*********\n");
}
if( !(caps.PrimitiveMiscCaps & D3DPMISCCAPS_INDEPENDENTWRITEMASKS) ) {
ERROR_LOG("******\nGS WARNING: Need independent write masks! Some z buffer effects might look bad\n*********\n");
bIndepWriteMasks = 0;
}
if( !(caps.PrimitiveMiscCaps & D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING) ) {
ERROR_LOG("******\nGS WARNING: Need MRT Post Pixel Shader Blending for some effects\n*********\n");
}
hr = pD3D->CheckDeviceFormat( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format,
D3DUSAGE_RENDERTARGET|D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING, D3DRTYPE_TEXTURE, g_RenderFormat);
if( g_GameSettings & GAME_32BITTARGS ) {
g_RenderFormat = D3DFMT_A8R8G8B8;
ERROR_LOG("Setting 32 bit render target\n");
}
else if( FAILED(hr) ) {
ERROR_LOG("******\nGS ERROR: Device doesn't support alpha blending for 16bit floating point targets.\nQuality will reduce.\n*********\n");
g_RenderFormat = D3DFMT_A8R8G8B8;
}
// hr = pD3D->CheckDeviceFormat( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_TEXTURE, D3DFMT_G32R32F);
//
// if( FAILED(hr) ) {
// ERROR_LOG("******\nGS ERROR: Device doesn't support G32R32F textures.\nTextures will look bad.\n*********\n");
// }
g_MaxTexWidth = caps.MaxTextureWidth;
g_MaxTexHeight = caps.MaxTextureHeight;
GPU_TEXWIDTH = caps.MaxTextureWidth/8;
g_fiGPU_TEXWIDTH = 1.0f / GPU_TEXWIDTH;
//g_RenderFormat = D3DFMT_A8R8G8B8;
pd3dDevice->GetRenderTarget(0, &psurfOrgTarg);
pd3dDevice->GetDepthStencilSurface(&psurfOrgDepth);
SETRS(D3DRS_ZENABLE, TRUE);
SETRS(D3DRS_LIGHTING, FALSE);
SETRS(D3DRS_SPECULARENABLE, FALSE);
V_RETURN( D3DXCreateFont( pd3dDevice, 15, 0, FW_BOLD, 1, FALSE, DEFAULT_CHARSET,
OUT_DEFAULT_PRECIS, DEFAULT_QUALITY, DEFAULT_PITCH | FF_DONTCARE,
"Arial", &pFont ) );
// create the vertex decl
const D3DVERTEXELEMENT9 Decl[] = {
{ 0, 0, D3DDECLTYPE_SHORT4N, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
{ 0, 8, D3DDECLTYPE_D3DCOLOR, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0 },
{ 0, 12, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
{ 0, 16, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 1 },
D3DDECL_END() };
V_RETURN(pd3dDevice->CreateVertexDeclaration(Decl, &pdecl));
#ifdef RELEASE_TO_PUBLIC
HRSRC hShaderSrc = FindResource(hInst, MAKEINTRESOURCE(IDR_SHADERS), RT_RCDATA);
assert( hShaderSrc != NULL );
HGLOBAL hShaderGlob = LoadResource(hInst, hShaderSrc);
assert( hShaderGlob != NULL );
s_lpShaderResources = (BYTE*)LockResource(hShaderGlob);
#endif
// load the effect
ERROR_LOG("Creating effects\n");
V_RETURN(LoadEffects());
g_bDisplayMsg = 0;
if( caps.VertexShaderVersion >= D3DVS_VERSION(3,0) && caps.PixelShaderVersion >= D3DPS_VERSION(2,0) )
g_nPixelShaderVer = SHADER_30;
else if( caps.PixelShaderVersion == D3DPS_VERSION(2,0) )
g_nPixelShaderVer = SHADER_20;
else
g_nPixelShaderVer = SHADER_20a;
#ifdef RELEASE_TO_PUBLIC
// create a sample shader
clampInfo temp;
memset(&temp, 0, sizeof(temp));
temp.wms = 3; temp.wmt = 3;
if( g_nPixelShaderVer != SHADER_30 ) {
// test more
if( LoadShadeEffect(0, 1, 1, 1, 1, temp, 0) == NULL ) {
g_nPixelShaderVer = SHADER_20b;
if( LoadShadeEffect(0, 1, 1, 1, 1, temp, 0) == NULL ) {
g_nPixelShaderVer = SHADER_20;
if( LoadShadeEffect(0, 0, 1, 1, 0, temp, 0) == NULL ) {
ERROR_LOG("*********\nGS ERROR: Need at least ps2.0 (ps2.0a+ recommended)\n*********\n");
Destroy(1);
return E_FAIL;
}
}
}
}
#endif
// set global shader constants
pd3dDevice->SetPixelShaderConstantF(27, DXVEC4(0.5f, (g_GameSettings&GAME_EXACTCOLOR)?0.9f/256.0f:0.5f/256.0f, 0,1/255.0f), 1); // g_fExactColor
pd3dDevice->SetPixelShaderConstantF(28, DXVEC4(-0.7f, -0.65f, 0.9f,0), 1); // g_fBilinear
pd3dDevice->SetPixelShaderConstantF(29, DXVEC4(1.0f/256.0f, 1.0004f, 1, 0.5f), 1); // g_fZBias
pd3dDevice->SetPixelShaderConstantF(30, DXVEC4(0,1, 0.001f, 0.5f), 1); // g_fc0
pd3dDevice->SetPixelShaderConstantF(31, DXVEC4(1/1024.0f, 0.2f/1024.0f, 1/128.0f, 1/512.0f), 1); // g_fMult
pd3dDevice->SetVertexShaderConstantF(29, DXVEC4(1.0f/256.0f, 1.0004f, 1, 0.5f), 1); // g_fZBias
pd3dDevice->SetVertexShaderConstantF(30, DXVEC4(0,1, 0.001f, 0.5f), 1); // g_fc0
pd3dDevice->SetVertexShaderConstantF(31, DXVEC4(0.5f, -0.5f, 0.5f, 0.5f + 0.4f/416.0f), 1); // g_fBitBltTrans
g_bDisplayMsg = 1;
if( g_nPixelShaderVer == SHADER_20 )
conf.bilinear = 0;
ERROR_LOG("Creating extra effects\n");
V_RETURN(LoadExtraEffects());
ERROR_LOG("GS Using pixel shaders %s\n", g_pShaders[g_nPixelShaderVer]);
pd3dDevice->Clear(0, NULL, D3DCLEAR_TARGET|D3DCLEAR_STENCIL|D3DCLEAR_ZBUFFER, 0, 1, 0);
// init draw fns
drawfn[0] = KickPoint;
drawfn[1] = KickLine;
drawfn[2] = KickLine;
drawfn[3] = KickTriangle;
drawfn[4] = KickTriangle;
drawfn[5] = KickTriangleFan;
drawfn[6] = KickSprite;
drawfn[7] = KickDummy;
SetAA(conf.aa);
GSsetGameCRC(g_LastCRC, g_GameSettings);
return S_OK;
}
void ZeroGS::Destroy(BOOL bD3D)
{
DeleteDeviceObjects();
vb[0].Destroy();
vb[1].Destroy();
for(int i = 0; i < ArraySize(pvs); ++i) {
SAFE_RELEASE(pvs[i]);
}
for(int i = 0; i < ArraySize(ppsRegular); ++i) {
SAFE_RELEASE(ppsRegular[i]);
}
for(int i = 0; i < ArraySize(ppsTexture); ++i) {
SAFE_RELEASE(ppsTexture[i]);
}
SAFE_RELEASE(pvsBitBlt);
SAFE_RELEASE(pvsBitBlt30);
SAFE_RELEASE(ppsBitBlt[0]); SAFE_RELEASE(ppsBitBlt[1]);
SAFE_RELEASE(ppsBitBltDepth[0]); SAFE_RELEASE(ppsBitBltDepth[1]);
SAFE_RELEASE(ppsBitBltDepthTex[0]); SAFE_RELEASE(ppsBitBltDepthTex[1]);
SAFE_RELEASE(ppsCRTCTarg[0]); SAFE_RELEASE(ppsCRTCTarg[1]);
SAFE_RELEASE(ppsCRTC[0]); SAFE_RELEASE(ppsCRTC[1]);
SAFE_RELEASE(ppsCRTC24[0]); SAFE_RELEASE(ppsCRTC24[1]);
SAFE_RELEASE(ppsOne);
SAFE_RELEASE(pdecl);
SAFE_RELEASE(pFont);
SAFE_RELEASE(psurfOrgTarg);
SAFE_RELEASE(psurfOrgDepth);
if( bD3D ) {
SAFE_RELEASE(pd3dDevice);
SAFE_RELEASE(pD3D);
}
}
void ZeroGS::GSStateReset()
{
icurctx = -1;
for(int i = 0; i < 2; ++i) {
LPD3DVB pvb = vb[i].pvb;
if( pvb != NULL ) pvb->AddRef();
vb[i].Destroy();
memset(&vb[i], 0, sizeof(VB));
vb[i].tex0.tw = 1;
vb[i].tex0.th = 1;
vb[i].pvb = pvb;
vb[i].scissor.x1 = 639;
vb[i].scissor.y1 = 479;
vb[i].tex0.tbw = 64;
}
s_RangeMngr.Clear();
g_MemTargs.Destroy();
s_RTs.Destroy();
s_DepthRTs.Destroy();
s_BitwiseTextures.Destroy();
vb[0].ictx = 0;
vb[1].ictx = 1;
s_bAlphaSet = FALSE;
}
void ZeroGS::AddMessage(const char* pstr, DWORD ms)
{
listMsgs.push_back(MESSAGE(pstr, timeGetTime()+ms));
}
void ZeroGS::ChangeWindowSize(int nNewWidth, int nNewHeight)
{
width = nNewWidth > 16 ? nNewWidth : 16;
height = nNewHeight > 16 ? nNewHeight : 16;
if( !(conf.options & GSOPTION_FULLSCREEN) ) {
conf.width = nNewWidth;
conf.height = nNewHeight;
//SaveConfig();
}
}
void ZeroGS::SetChangeDeviceSize(int nNewWidth, int nNewHeight)
{
s_nNewWidth = nNewWidth;
s_nNewHeight = nNewHeight;
if( !(conf.options & GSOPTION_FULLSCREEN) ) {
conf.width = nNewWidth;
conf.height = nNewHeight;
//SaveConfig();
}
}
void ZeroGS::Reset()
{
s_RTs.ResolveAll();
s_DepthRTs.ResolveAll();
vb[0].Unlock();
vb[1].Unlock();
memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
s_nLastResolveReset = 0;
icurctx = -1;
GSStateReset();
Destroy(0);
drawfn[0] = KickDummy;
drawfn[1] = KickDummy;
drawfn[2] = KickDummy;
drawfn[3] = KickDummy;
drawfn[4] = KickDummy;
drawfn[5] = KickDummy;
drawfn[6] = KickDummy;
drawfn[7] = KickDummy;
}
void ZeroGS::ChangeDeviceSize(int nNewWidth, int nNewHeight)
{
int oldscreen = s_nFullscreen;
int oldwidth = width, oldheight = height;
if( FAILED(Create(nNewWidth&~7, nNewHeight&~7)) ) {
DEBUG_LOG("Failed to recreate, changing to old\n");
if( FAILED(Create(oldwidth, oldheight)) ) {
MessageBox(NULL, "failed to create dev, exiting...\n", "Error", MB_OK);
exit(0);
}
}
if( FAILED(InitDeviceObjects()) ) {
MessageBox(NULL, "failed to init dev objs, exiting...\n", "Error", MB_OK);
exit(0);
}
for(int i = 0; i < 2; ++i) {
vb[i].bNeedFrameCheck = vb[i].bNeedZCheck = 1;
vb[i].CheckFrame(0);
}
if( oldscreen && !(conf.options & GSOPTION_FULLSCREEN) ) { // if transitioning from full screen
RECT rc;
rc.left = 0; rc.top = 0;
rc.right = conf.width; rc.bottom = conf.height;
AdjustWindowRect(&rc, conf.winstyle, FALSE);
RECT rcdesktop;
GetWindowRect(GetDesktopWindow(), &rcdesktop);
SetWindowLong( GShwnd, GWL_STYLE, conf.winstyle );
SetWindowPos(GShwnd, HWND_TOP, ((rcdesktop.right-rcdesktop.left)-(rc.right-rc.left))/2,
((rcdesktop.bottom-rcdesktop.top)-(rc.bottom-rc.top))/2,
rc.right-rc.left, rc.bottom-rc.top, SWP_SHOWWINDOW);
UpdateWindow(GShwnd);
}
vb[0].Lock();
vb[1].Lock();
assert( vb[0].pbuf != NULL && vb[1].pbuf != NULL );
}
void ZeroGS::SetAA(int mode)
{
float f;
// need to flush all targets
s_RTs.ResolveAll();
s_RTs.Destroy();
s_DepthRTs.ResolveAll();
s_DepthRTs.Destroy();
s_AAx = s_AAy = 0;
if( mode > 0 )
{
s_AAx = (mode+1) / 2;
s_AAy = mode / 2;
}
memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
s_nLastResolveReset = 0;
vb[0].prndr = NULL; vb[0].pdepth = NULL; vb[0].bNeedFrameCheck = 1; vb[0].bNeedZCheck = 1;
vb[1].prndr = NULL; vb[1].pdepth = NULL; vb[1].bNeedFrameCheck = 1; vb[1].bNeedZCheck = 1;
f = mode > 0 ? 2.0f : 1.0f;
SETRS(D3DRS_POINTSIZE, FtoDW(f));
}
#ifdef RELEASE_TO_PUBLIC
#define LOAD_VS(Index, ptr) { \
assert( mapShaderResources.find(Index) != mapShaderResources.end() ); \
header = mapShaderResources[Index]; \
assert( (header) != NULL && (header)->index == (Index) ); \
hr = pd3dDevice->CreateVertexShader((DWORD*)(s_lpShaderResources + (header)->offset), &(ptr)); \
if( FAILED(hr) || ptr == NULL ) { \
DEBUG_LOG("errors 0x%x for %d, failed.. try updating your drivers or dx\n", hr, Index); \
return E_FAIL; \
} \
} \
#define LOAD_PS(index, ptr) { \
assert( mapShaderResources.find(index) != mapShaderResources.end() ); \
header = mapShaderResources[index]; \
hr = pd3dDevice->CreatePixelShader((DWORD*)(s_lpShaderResources + (header)->offset), &(ptr)); \
if( FAILED(hr) || ptr == NULL ) { \
DEBUG_LOG("errors 0x%x for %s, failed.. try updating your drivers or dx\n", hr, index); \
return E_FAIL; \
} \
} \
HRESULT ZeroGS::LoadEffects()
{
assert( s_lpShaderResources != NULL );
// process the header
DWORD num = *(DWORD*)s_lpShaderResources;
SHADERHEADER* header = (SHADERHEADER*)((BYTE*)s_lpShaderResources + 4);
mapShaderResources.clear();
while(num-- > 0 ) {
mapShaderResources[header->index] = header;
++header;
}
// clear the textures
for(int i = 0; i < ArraySize(ppsTexture); ++i) {
SAFE_RELEASE(ppsTexture[i]);
}
memset(ppsTexture, 0, sizeof(ppsTexture));
return S_OK;
}
// called
HRESULT ZeroGS::LoadExtraEffects()
{
HRESULT hr;
SHADERHEADER* header;
DWORD mask = g_nPixelShaderVer == SHADER_30 ? SH_30 : 0;
const int vsshaders[4] = { SH_REGULARVS, SH_TEXTUREVS, SH_REGULARFOGVS, SH_TEXTUREFOGVS };
for(int i = 0; i < 4; ++i) {
LOAD_VS(vsshaders[i]|mask, pvs[2*i]);
LOAD_VS(vsshaders[i]|mask|SH_CONTEXT1, pvs[2*i+1]);
LOAD_VS(vsshaders[i]|mask|SH_WRITEDEPTH, pvs[2*i+8]);
LOAD_VS(vsshaders[i]|mask|SH_WRITEDEPTH|SH_CONTEXT1, pvs[2*i+8+1]);
}
LOAD_VS(SH_BITBLTVS, pvsBitBlt);
//LOAD_VS(SH_BITBLTVS|SH_30, pvsBitBlt30);
LOAD_PS(SH_REGULARPS|mask, ppsRegular[0]);
LOAD_PS(SH_REGULARFOGPS|mask, ppsRegular[1]);
LOAD_PS(SH_REGULARPS|SH_WRITEDEPTH|mask, ppsRegular[2]);
LOAD_PS(SH_REGULARFOGPS|SH_WRITEDEPTH|mask, ppsRegular[3]);
LOAD_PS(SH_BITBLTPS, ppsBitBlt[0]); LOAD_PS(SH_BITBLTAAPS, ppsBitBlt[0]);
LOAD_PS(SH_BITBLTDEPTHPS, ppsBitBltDepth[0]); LOAD_PS(SH_BITBLTDEPTHMRTPS, ppsBitBltDepth[1]);
LOAD_PS(SH_BITBLTDEPTHTEXPS, ppsBitBltDepthTex[0]); LOAD_PS(SH_BITBLTDEPTHTEXMRTPS, ppsBitBltDepthTex[1]);
LOAD_PS(SH_CRTCTARGPS, ppsCRTCTarg[0]); LOAD_PS(SH_CRTCTARGINTERPS, ppsCRTCTarg[1]);
LOAD_PS(SH_CRTCPS, ppsCRTC[0]); LOAD_PS(SH_CRTCINTERPS, ppsCRTC[1]);
LOAD_PS(SH_CRTC24PS, ppsCRTC24[0]); LOAD_PS(SH_CRTC24INTERPS, ppsCRTC24[1]);
LOAD_PS(SH_ZEROPS|mask, ppsOne);
LOAD_PS(SH_BASETEXTUREPS, ppsBaseTexture);
LOAD_PS(SH_CONVERT16TO32PS, ppsConvert16to32);
LOAD_PS(SH_CONVERT32TO16PS, ppsConvert32to16);
return S_OK;
}
LPD3DPS ZeroGS::LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context)
{
int texwrap;
assert( texfilter < NUM_FILTERS );
if(g_nPixelShaderVer == SHADER_20 )
texfilter = 0;
if(g_nPixelShaderVer == SHADER_20 )
exactcolor = 0;
if( clamp.wms == clamp.wmt ) {
switch( clamp.wms ) {
case 0: texwrap = TEXWRAP_REPEAT; break;
case 1: texwrap = TEXWRAP_CLAMP; break;
case 2: texwrap = TEXWRAP_CLAMP; break;
default: texwrap = TEXWRAP_REGION_REPEAT; break;
}
}
else if( clamp.wms==3||clamp.wmt==3)
texwrap = TEXWRAP_REGION_REPEAT;
else
texwrap = TEXWRAP_REPEAT_CLAMP;
int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0);
assert( index < ArraySize(ppsTexture) );
LPD3DPS* pps = ppsTexture+index;
if( *pps != NULL )
return *pps;
index += NUM_SHADERS*g_nPixelShaderVer;
assert( mapShaderResources.find(index) != mapShaderResources.end() );
SHADERHEADER* header = mapShaderResources[index];
if( header == NULL ) DEBUG_LOG("%d %d\n", index%NUM_SHADERS, g_nPixelShaderVer);
assert( header != NULL );
HRESULT hr = pd3dDevice->CreatePixelShader((DWORD*)(s_lpShaderResources + header->offset), pps);
if( SUCCEEDED(hr) )
return *pps;
if( g_bDisplayMsg )
ERROR_LOG("Failed to create shader %d,%d,%d,%d\n", 3, fog, texfilter, 4*clamp.wms+clamp.wmt);
return NULL;
}
#else // not RELEASE_TO_PUBLIC
//#define EFFECT_NAME "f:\\ps2dev\\pcsx2\\zerogs\\dx\\"
#define EFFECT_NAME ".\\"
#define COMPILE_SHADER(name, type, flags)
class ZeroGSShaderInclude : public ID3DXInclude
{
public:
int context;
STDMETHOD(Open)(D3DXINCLUDE_TYPE IncludeType, LPCSTR pFileName, LPCVOID pParentData, LPCVOID *ppData, UINT *pBytes)
{
const char* pfilename = pFileName;
char strfile[255];
if( strstr(pFileName, "ps2hw_ctx") != NULL ) {
_snprintf(strfile, 255, "%sps2hw_ctx%d.fx", EFFECT_NAME, context);
pfilename = strfile;
}
else if( strstr(pFileName, "\\") == NULL ) {
_snprintf(strfile, 255, "%s%s", EFFECT_NAME, pFileName);
pfilename = strfile;
}
FILE* f = fopen(pfilename, "rb");
if( f == NULL )
return E_FAIL;
fseek(f, 0, SEEK_END);
DWORD size = ftell(f);
fseek(f, 0, SEEK_SET);
char* buffer = new char[size+1];
fread(buffer, size, 1, f);
buffer[size] = 0;
*ppData = buffer;
*pBytes = size;
fclose(f);
return S_OK;
}
STDMETHOD(Close)(LPCVOID pData)
{
delete[] (char*)pData;
return S_OK;
}
};
#define LOAD_VS(name, ptr, shaderver) { \
LPD3DXBUFFER pShader, pError; \
V(D3DXCompileShaderFromFile(EFFECT_NAME"ps2hw.fx", pmacros, pInclude, name, shaderver, ShaderFlagsVS, &pShader, &pError, NULL)); \
if( FAILED(hr) ) \
{ \
DEBUG_LOG("Failed to load vs %s: \n%s\n", name, pError->GetBufferPointer()); \
SAFE_RELEASE(pShader); \
SAFE_RELEASE(pError); \
return hr; \
} \
hr = pd3dDevice->CreateVertexShader((const DWORD*)pShader->GetBufferPointer(), &(ptr)); \
SAFE_RELEASE(pShader); \
SAFE_RELEASE(pError); \
} \
#define LOAD_PS(name, ptr, shmodel) { \
LPD3DXBUFFER pShader, pError; \
SAFE_RELEASE(ptr); \
V(D3DXCompileShaderFromFile(EFFECT_NAME"ps2hw.fx", pmacros, pInclude, name, shmodel, ShaderFlagsPS, &pShader, &pError, NULL)); \
if( FAILED(hr) ) \
{ \
DEBUG_LOG("Failed to load ps %s: \n%s\n", name, pError->GetBufferPointer()); \
SAFE_RELEASE(pShader); \
SAFE_RELEASE(pError); \
return hr; \
} \
hr = pd3dDevice->CreatePixelShader((const DWORD*)pShader->GetBufferPointer(), &(ptr)); \
SAFE_RELEASE(pShader); \
SAFE_RELEASE(pError); \
if( FAILED(hr) || ptr == NULL ) { \
DEBUG_LOG("errors 0x%x for %s, failed.. try updating your drivers or dx\n", hr, name); \
return E_FAIL; \
} \
} \
HRESULT ZeroGS::LoadEffects()
{
// clear the textures
for(int i = 0; i < ArraySize(ppsTexture); ++i) {
SAFE_RELEASE(ppsTexture[i]);
}
memset(ppsTexture, 0, sizeof(ppsTexture));
return S_OK;
}
#define VS_VER (g_nPixelShaderVer == SHADER_20?"vs_2_0":"vs_3_0")
#define PS_VER (g_nPixelShaderVer == SHADER_20?"ps_2_0":"ps_3_0")
HRESULT ZeroGS::LoadExtraEffects()
{
HRESULT hr;
DWORD ShaderFlagsPS = !DEBUG_PS2 ? 0 : (D3DXSHADER_DEBUG|D3DXSHADER_SKIPOPTIMIZATION);
DWORD ShaderFlagsVS = !DEBUG_PS2 ? 0 : (D3DXSHADER_DEBUG|D3DXSHADER_SKIPOPTIMIZATION);
ZeroGSShaderInclude inc;
inc.context = 0;
ZeroGSShaderInclude* pInclude = &inc;
//assert( g_nPixelShaderVer == SHADER_30) ;
const char* pstrps = g_nPixelShaderVer == SHADER_20 ? "ps_2_0" : "ps_2_a";
const char* pvsshaders[4] = { "RegularVS", "TextureVS", "RegularFogVS", "TextureFogVS" };
D3DXMACRO macros[2] = {0};
D3DXMACRO* pmacros = NULL;
macros[0].Name = "WRITE_DEPTH";
macros[0].Definition = "1";
for(int i = 0; i < 4; ++i) {
pmacros = NULL;
inc.context = 0;
LOAD_VS(pvsshaders[i], pvs[2*i], VS_VER);
inc.context = 1;
LOAD_VS(pvsshaders[i], pvs[2*i+1], VS_VER);
pmacros = macros;
inc.context = 0;
LOAD_VS(pvsshaders[i], pvs[2*i+8], VS_VER);
inc.context = 1;
LOAD_VS(pvsshaders[i], pvs[2*i+8+1], VS_VER);
}
inc.context = 0;
pmacros = NULL;
LOAD_PS("RegularPS", ppsRegular[0], PS_VER);
LOAD_PS("RegularFogPS", ppsRegular[1], PS_VER);
pmacros = macros;
LOAD_PS("RegularPS", ppsRegular[2], PS_VER);
LOAD_PS("RegularFogPS", ppsRegular[3], PS_VER);
pmacros = NULL;
LOAD_VS("BitBltVS", pvsBitBlt, "vs_2_0");
LOAD_PS("BitBltPS", ppsBitBlt[0], pstrps);
LOAD_PS("BitBltAAPS", ppsBitBlt[1], pstrps);
LOAD_PS("BitBltDepthPS", ppsBitBltDepth[0], pstrps);
LOAD_PS("BitBltDepthMRTPS", ppsBitBltDepth[1], pstrps);
LOAD_PS("BitBltDepthTexPS", ppsBitBltDepthTex[0], pstrps);
LOAD_PS("BitBltDepthTexMRTPS", ppsBitBltDepthTex[1], pstrps);
LOAD_PS("CRTCTargPS", ppsCRTCTarg[0], pstrps); LOAD_PS("CRTCTargInterPS", ppsCRTCTarg[1], pstrps);
LOAD_PS("CRTCPS", ppsCRTC[0], pstrps); LOAD_PS("CRTCInterPS", ppsCRTC[1], pstrps);
LOAD_PS("CRTC24PS", ppsCRTC24[0], pstrps); LOAD_PS("CRTC24InterPS", ppsCRTC24[1], pstrps);
LOAD_PS("ZeroPS", ppsOne, PS_VER);
LOAD_PS("BaseTexturePS", ppsBaseTexture, pstrps);
LOAD_PS("Convert16to32PS", ppsConvert16to32, pstrps);
LOAD_PS("Convert32to16PS", ppsConvert32to16, pstrps);
return S_OK;
}
LPD3DPS ZeroGS::LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context)
{
int texwrap;
assert( texfilter < NUM_FILTERS );
//assert( g_nPixelShaderVer == SHADER_30 );
if( clamp.wms == clamp.wmt ) {
switch( clamp.wms ) {
case 0: texwrap = TEXWRAP_REPEAT; break;
case 1: texwrap = TEXWRAP_CLAMP; break;
case 2: texwrap = TEXWRAP_CLAMP; break;
default:
texwrap = TEXWRAP_REGION_REPEAT; break;
}
}
else if( clamp.wms==3||clamp.wmt==3)
texwrap = TEXWRAP_REGION_REPEAT;
else
texwrap = TEXWRAP_REPEAT_CLAMP;
int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0);
LPD3DPS* pps = ppsTexture+index;
if( *pps != NULL )
return *pps;
ZeroGSShaderInclude inc;
inc.context = context;
HRESULT hr = LoadShaderFromType(EFFECT_NAME"ps2hw.fx", type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, g_nPixelShaderVer, 0, pd3dDevice, &inc, pps);
if( SUCCEEDED(hr) )
return *pps;
DEBUG_LOG("Failed to create shader %d,%d,%d,%d\n", type, fog, texfilter, 4*clamp.wms+clamp.wmt);
return NULL;
}
#endif // RELEASE_TO_PUBLIC
HRESULT ZeroGS::InitDeviceObjects()
{
//g_GameSettings |= 0;//GAME_VSSHACK|GAME_FULL16BITRES|GAME_NODEPTHRESOLVE|GAME_FASTUPDATE;
//s_bWriteDepth = TRUE;
DeleteDeviceObjects();
int i;
HRESULT hr;
SETRS(D3DRS_SRCBLEND, D3DBLEND_ONE);
SETRS(D3DRS_DESTBLEND, D3DBLEND_ONE);
if( pFont ) V_RETURN( pFont->OnResetDevice() );
V_RETURN( D3DXCreateSprite( pd3dDevice, &pSprite ) );
V(D3DXCreateTextureFromResource(pd3dDevice, hInst, MAKEINTRESOURCE( IDB_ZEROGSLOGO ), &ptexLogo));
for(i = 0; i < 2; ++i)
{
V_RETURN(pd3dDevice->CreateVertexBuffer( sizeof(VertexGPU) * POINT_BUFFERSIZE, D3DUSAGE_DYNAMIC|D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &vb[i].pvb, NULL));
}
// create the blocks texture
D3DFORMAT blockfmt = D3DFMT_R32F;
g_fBlockMult = 1;
if( FAILED(hr = pd3dDevice->CreateTexture(BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 1, 0, blockfmt, D3DPOOL_MANAGED, &ptexBlocks, NULL)) ) {
blockfmt = D3DFMT_G16R16;
g_fBlockMult = 65535.0f*(float)g_fiGPU_TEXWIDTH;
V_RETURN(pd3dDevice->CreateTexture(BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 1, 0, blockfmt, D3DPOOL_MANAGED, &ptexBlocks, NULL));
}
if( blockfmt == D3DFMT_R32F ) {
if( FAILED(hr = pd3dDevice->CreateTexture(BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 1, 0, D3DFMT_A32B32G32R32F, D3DPOOL_MANAGED, &ptexBilinearBlocks, NULL)) ) {
DEBUG_LOG("Failed to create bilinear block texture, fmt = D3DFMT_A32B32G32R32F\n");
}
}
else ptexBilinearBlocks = NULL;
// fill a simple rect
V_RETURN(pd3dDevice->CreateVertexBuffer( 4 * sizeof(VertexGPU), D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &pvbRect, NULL));
VertexGPU* pvert;
pvbRect->Lock(0, 0, (void**)&pvert, 0);
pvert->x = -0x7fff; pvert->y = 0x7fff; pvert->z = 0; pvert->s = 0; pvert->t = 0; pvert++;
pvert->x = 0x7fff; pvert->y = 0x7fff; pvert->z = 0; pvert->s = 1; pvert->t = 0; pvert++;
pvert->x = -0x7fff; pvert->y = -0x7fff; pvert->z = 0; pvert->s = 0; pvert->t = 1; pvert++;
pvert->x = 0x7fff; pvert->y = -0x7fff; pvert->z = 0; pvert->s = 1; pvert->t = 1; pvert++;
pvbRect->Unlock();
D3DLOCKED_RECT lock, lockbilinear;
ptexBlocks->LockRect(0, &lock, NULL, 0);
if( ptexBilinearBlocks != NULL )
ptexBilinearBlocks->LockRect(0, &lockbilinear, NULL, 0);
BLOCK::FillBlocks(&lock, ptexBilinearBlocks != NULL ? &lockbilinear : NULL, blockfmt);
ptexBlocks->UnlockRect(0);
if( ptexBilinearBlocks != NULL )
ptexBilinearBlocks->UnlockRect(0);
// create the conversion textures
V_RETURN(pd3dDevice->CreateTexture(256, 256, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &ptexConv16to32, NULL));
ptexConv16to32->LockRect(0, &lock, NULL, 0);
assert(lock.Pitch == 256*4);
u32* dst = (u32*)lock.pBits;
for(i = 0; i < 256*256; ++i) {
DWORD tempcol = RGBA16to32(i);
// have to flip r and b
*dst++ = (tempcol&0xff00ff00)|((tempcol&0xff)<<16)|((tempcol&0xff0000)>>16);
}
ptexConv16to32->UnlockRect(0);
V_RETURN(pd3dDevice->CreateVolumeTexture(32, 32, 32, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &ptexConv32to16, NULL));
D3DLOCKED_BOX lockbox;
ptexConv32to16->LockBox(0, &lockbox, NULL, 0);
dst = (u32*)lockbox.pBits;
for(i = 0; i < 32; ++i) {
for(int j = 0; j < 32; ++j) {
for(int k = 0; k < 32; ++k) {
u32 col = (i<<10)|(j<<5)|k;
*dst++ = ((col&0xff)<<16)|(col&0xff00);
}
}
}
ptexConv32to16->UnlockBox(0);
// set samplers
for(i = 0; i < 8; ++i) {
pd3dDevice->SetSamplerState(i, D3DSAMP_MINFILTER, D3DTEXF_POINT);
pd3dDevice->SetSamplerState(i, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
pd3dDevice->SetSamplerState(i, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
pd3dDevice->SetSamplerState(i, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
}
//pd3dDevice->SetSamplerState(SAMP_SRC, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP);
//pd3dDevice->SetSamplerState(SAMP_SRC, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSW, D3DTADDRESS_CLAMP); // can be used as a 3d texture
pd3dDevice->SetSamplerState(SAMP_BITWISEANDX, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BITWISEANDX, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BITWISEANDY, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BITWISEANDY, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
pd3dDevice->SetTexture(SAMP_BLOCKS, ptexBlocks);
pd3dDevice->SetTexture(SAMP_BILINEARBLOCKS, ptexBilinearBlocks);
pd3dDevice->SetVertexDeclaration(pdecl);
SETRS(D3DRS_STENCILENABLE, FALSE);
SETRS(D3DRS_SCISSORTESTENABLE, 1);
SETRS(D3DRS_SEPARATEALPHABLENDENABLE, USEALPHABLENDING);
SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE);
SETRS(D3DRS_DESTBLENDALPHA, D3DBLEND_ZERO);
SETRS(D3DRS_CULLMODE, D3DCULL_NONE);
SETRS(D3DRS_BLENDFACTOR, 0x80000000);
SETRS(D3DRS_COLORWRITEENABLE1, 0);
// points
SETRS(D3DRS_POINTSCALEENABLE, FALSE);
SETRS(D3DRS_POINTSIZE, FtoDW(1.0f));
g_nDepthBias = 0;
SETRS(D3DRS_DEPTHBIAS, FtoDW(0.000015f));
SETCONSTF(GPU_Z, g_vdepth);//vb[icurctx].zbuf.psm&3]);
s_vznorm = DXVEC4(g_filog32, 0, 0,0);
SETCONSTF(GPU_ZNORM, s_vznorm);
return S_OK;
}
void ZeroGS::DeleteDeviceObjects()
{
if( s_aviinit ) {
StopCapture();
STOP_AVI();
DEBUG_LOG("zerogs.avi stopped");
s_aviinit = 0;
}
SAFE_RELEASE(s_ptexAVICapture);
if( pFont ) pFont->OnLostDevice();
SAFE_RELEASE(pSprite);
g_MemTargs.Destroy();
s_RTs.Destroy();
s_DepthRTs.Destroy();
s_BitwiseTextures.Destroy();
SAFE_RELEASE(s_ptexInterlace);
SAFE_RELEASE(pvbRect);
SAFE_RELEASE(ptexBlocks);
SAFE_RELEASE(ptexBilinearBlocks);
SAFE_RELEASE(ptexConv16to32);
SAFE_RELEASE(ptexConv32to16);
s_bAlphaSet = FALSE;
vb[0].Unlock();
SAFE_RELEASE(vb[0].pvb);
vb[1].Unlock();
SAFE_RELEASE(vb[1].pvb);
}
void ZeroGS::Prim()
{
if( g_bIsLost )
return;
VB& curvb = vb[prim->ctxt];
if( curvb.CheckPrim() )
Flush(prim->ctxt);
curvb.curprim._val = prim->_val;
// flush the other pipe if sharing the same buffer
// if( vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp && vb[!prim->ctxt].dwCount > 0 )
// {
// assert( vb[prim->ctxt].dwCount == 0 );
// Flush(!prim->ctxt);
// }
curvb.curprim.prim = prim->prim;
vb[prim->ctxt].Lock();
}
int GetTexFilter(const tex1Info& tex1)
{
// always force
if( conf.bilinear == 2 )
return 1;
int texfilter = 0;
if( conf.bilinear && ptexBilinearBlocks != NULL ) {
if( tex1.mmin <= 1 )
texfilter = tex1.mmin|tex1.mmag;
else
texfilter = tex1.mmag ? ((tex1.mmin+2)&5) : tex1.mmin;
texfilter = texfilter == 1 || texfilter == 4 || texfilter == 5;
}
return texfilter;
}
void ZeroGS::ReloadEffects()
{
#ifndef RELEASE_TO_PUBLIC
for(int i = 0; i < ArraySize(ppsTexture); ++i) {
SAFE_RELEASE(ppsTexture[i]);
}
memset(ppsTexture, 0, sizeof(ppsTexture));
LoadExtraEffects();
#endif
}
static int s_ClutResolve = 0;
static int s_PSM8Resolve = 0;
void ZeroGS::Flush(int context)
{
assert( context >= 0 && context <= 1 );
#ifndef RELEASE_TO_PUBLIC
if( g_bUpdateEffect ) {
ReloadEffects();
g_bUpdateEffect = 0;
}
#endif
VB& curvb = vb[context];
const pixTest curtest = curvb.test;
if( curvb.dwCount == 0 || (curtest.zte && curtest.ztst == 0) || g_bIsLost ) {
curvb.dwCount = 0;
return;
}
if( s_RangeMngr.ranges.size() > 0 ) {
// don't want infinite loop
DWORD prevcount = curvb.dwCount;
curvb.dwCount = 0;
FlushTransferRanges(curvb.curprim.tme ? &curvb.tex0 : NULL);
curvb.dwCount = prevcount;
//if( curvb.dwCount == 0 )
// return;
}
if( curvb.bNeedTexCheck ) {
curvb.FlushTexData();
if( curvb.dwCount == 0 )
return;
}
if( !s_bBeginScene ) {
pd3dDevice->BeginScene();
s_bBeginScene = TRUE;
}
curvb.Unlock();
LPD3DTEX ptexRenderTargetCached = NULL;
int cachedtbp0, cachedtbw, cachedtbh;
//s_bWriteDepth = TRUE;
//static int lasttime = 0;
//fprintf(gsLog, "%d: %d\n", g_SaveFrameNum, timeGetTime()-lasttime);
//lasttime = timeGetTime();
if( curvb.bNeedFrameCheck || curvb.bNeedZCheck ) {
int tpsm = curvb.tex0.psm;
if( curvb.bNeedTexCheck )
tpsm = (curvb.uNextTex0Data[0] >> 20) & 0x3f;
if( tpsm == PSMT8H && (g_GameSettings&GAME_NOTARGETCLUT) ) {
curvb.dwCount = 0;
return;
}
// check for the texture before checking the frame (since things could get destroyed)
if( (g_GameSettings&GAME_PARTIALPOINTERS) &&curvb.curprim.tme ) {
// if( (curvb.gsfb.fbp&0xff) != 0 ) {
// curvb.dwCount = 0;
// return;
// }
// if texture is part of a previous target, use that instead
int tbw = curvb.tex0.tbw;
int tbp0 = curvb.tex0.tbp0;
if( curvb.bNeedTexCheck ) {
// not yet initied, but still need to get correct target! (xeno3 ingame)
tbp0 = (curvb.uNextTex0Data[0] & 0x3fff);
tbw = ((curvb.uNextTex0Data[0] >> 14) & 0x3f) * 64;
}
if( (tpsm&~1) == 0 ) {
CRenderTarget* ptemptarg = s_RTs.GetTarg(tbp0, tbw);
if( ptemptarg != NULL && (ptemptarg->psm&~1) == (tpsm&~1) ) {
ptexRenderTargetCached = ptemptarg->ptex;
ptexRenderTargetCached->AddRef();
cachedtbp0 = ptemptarg->fbp;
cachedtbw = ptemptarg->fbw;
cachedtbh = ptemptarg->fbh;
}
}
}
curvb.CheckFrame(curvb.curprim.tme ? curvb.tex0.tbp0 : 0);
}
// if( g_SaveFrameNum == 976 ) {
// curvb.prndr->ConvertTo32();
// }
if( curvb.prndr == NULL || curvb.pdepth == NULL ) {
WARN_LOG("Current render target NULL (ctx: %d)", context);
curvb.dwCount = 0;
SAFE_RELEASE(ptexRenderTargetCached);
return;
}
#if defined(PRIM_LOG) && defined(PCSX2_DEBUG)
static const char* patst[8] = { "NEVER", "ALWAYS", "LESS", "LEQUAL", "EQUAL", "GEQUAL", "GREATER", "NOTEQUAL"};
static const char* pztst[4] = { "NEVER", "ALWAYS", "GEQUAL", "GREATER" };
static const char* pafail[4] = { "KEEP", "FB_ONLY", "ZB_ONLY", "RGB_ONLY" };
PRIM_LOG("**Drawing ctx %d, num %d, fbp: 0x%x, zbp: 0x%x, fpsm: %d, zpsm: %d, fbw: %d\n", context, vb[context].dwCount, curvb.prndr->fbp, curvb.zbuf.zbp, curvb.prndr->psm, curvb.zbuf.psm, curvb.prndr->fbw);
PRIM_LOG("prim: prim=%x iip=%x tme=%x fge=%x abe=%x aa1=%x fst=%x ctxt=%x fix=%x\n",
curvb.curprim.prim, curvb.curprim.iip, curvb.curprim.tme, curvb.curprim.fge, curvb.curprim.abe, curvb.curprim.aa1, curvb.curprim.fst, curvb.curprim.ctxt, curvb.curprim.fix);
PRIM_LOG("test: ate:%d, atst: %s, aref: %d, afail: %s, date: %d, datm: %d, zte: %d, ztst: %s, fba: %d\n",
curvb.test.ate, patst[curvb.test.atst], curvb.test.aref, pafail[curvb.test.afail], curvb.test.date, curvb.test.datm, curvb.test.zte, pztst[curvb.test.ztst], curvb.fba.fba);
PRIM_LOG("alpha: A%d B%d C%d D%d FIX:%d pabe: %d; aem: %d, ta0: %d, ta1: %d\n", curvb.alpha.a, curvb.alpha.b, curvb.alpha.c, curvb.alpha.d, curvb.alpha.fix, gs.pabe, gs.texa.aem, gs.texa.ta[0], gs.texa.ta[1]);
PRIM_LOG("tex0: tbp0=0x%x, tbw=%d, psm=0x%x, tw=%d, th=%d, tcc=%d, tfx=%d, cbp=0x%x, cpsm=0x%x, csm=%d, csa=%d, cld=%d\n",
curvb.tex0.tbp0, curvb.tex0.tbw, curvb.tex0.psm, curvb.tex0.tw,
curvb.tex0.th, curvb.tex0.tcc, curvb.tex0.tfx, curvb.tex0.cbp,
curvb.tex0.cpsm, curvb.tex0.csm, curvb.tex0.csa, curvb.tex0.cld);
PRIM_LOG("frame: %d\n\n", g_SaveFrameNum);
#endif
CMemoryTarget* pmemtarg = NULL;
CRenderTarget* ptextarg = NULL;
// kh2 hack
// if( curvb.dwCount == 2 && curvb.curprim.tme == 0 && curvb.curprim.abe == 0 && (curvb.tex0.tbp0 == 0x2a00 || curvb.tex0.tbp0==0x1d00) ) {
// // skip
// DEBUG_LOG("skipping\n");
// g_SaveFrameNum++;
// curvb.dwCount = 0;
// return;
// }
if( curtest.date || gs.pabe )
SetDestAlphaTest();
// set the correct pixel shaders
if( curvb.curprim.tme && ptexRenderTargetCached == NULL ) {
// if texture is part of a previous target, use that instead
int tbw = curvb.tex0.tbw;
int tbp0 = curvb.tex0.tbp0;
int tpsm = curvb.tex0.psm;
if( curvb.bNeedTexCheck ) {
// not yet initied, but still need to get correct target! (xeno3 ingame)
tbp0 = (curvb.uNextTex0Data[0] & 0x3fff);
tbw = ((curvb.uNextTex0Data[0] >> 14) & 0x3f) * 64;
tpsm = (curvb.uNextTex0Data[0] >> 20) & 0x3f;
}
ptextarg = s_RTs.GetTarg(tbp0, tbw);
if( ptextarg == NULL && tpsm == PSMT8 ) {
// check for targets with half the width
ptextarg = s_RTs.GetTarg(tbp0, tbw/2);
if( ptextarg == NULL ) {
tbp0 &= ~0x7ff;
ptextarg = s_RTs.GetTarg(tbp0, tbw/2); // mgs3 hack
if( ptextarg == NULL ) {
// check the next level (mgs3)
tbp0 &= ~0xfff;
ptextarg = s_RTs.GetTarg(tbp0, tbw/2); // mgs3 hack
}
if( ptextarg != NULL && ptextarg->start > tbp0*256 ) {
// target beyond range, so ignore
ptextarg = NULL;
}
}
// if( ptextarg != NULL ) {
// // make sure target isn't invalidated by the ranges
// for(vector<CRangeManager::RANGE>::iterator itrange = s_RangeMngr.ranges.begin(); itrange != s_RangeMngr.ranges.end(); ++itrange ) {
//
// int start = itrange->start;
// int end = itrange->end;
//
// // if start and end are in the range or there's a range that is between tbp0 and start, then remove
// if( (start <= tbp0*256 && end > tbp0*256) || (start >= ptextarg->fbp*256 && start <= tbp0*256) ) {
// ptextarg = NULL;
// break;
// }
// }
// }
if( ptextarg != NULL && !(ptextarg->status&CRenderTarget::TS_NeedUpdate) ) {
// find the equivalent memtarg
if( s_PSM8Resolve == 0 ) { //|| (s_PSM8Resolve > 0 && s_PSM8Resolve+128 < g_SaveFrameNum) ) {
DWORD prevcount = curvb.dwCount;
curvb.dwCount = 0;
if( ptextarg->pmimicparent != NULL )
ptextarg->pmimicparent->Resolve();
else
ptextarg->Resolve();
curvb.dwCount = prevcount;
s_PSM8Resolve = g_SaveFrameNum; // stop from resolving again (once per frame)
}
tex0Info mytex0 = curvb.tex0;
mytex0.tbp0 = tbp0;
if( ptextarg->pmimicparent != NULL ) {
mytex0.tbp0 = ptextarg->pmimicparent->fbp;
}
pmemtarg = g_MemTargs.GetMemoryTarget(mytex0, 1);
// have to add an offset to all texture reads
mytex0.tbp0 = tbp0; // change so that SetTexVariablesInt can set the right offsets
SetTexVariablesInt(context, GetTexFilter(curvb.tex1), mytex0, pmemtarg, s_bForceTexFlush);
curvb.bVarsTexSync = TRUE;
ptextarg = NULL; // won't be needing this anymore
}
}
if( (tpsm&0x30)==0x30 && ptextarg == NULL ) {
// try depth
ptextarg = s_DepthRTs.GetTarg(tbp0, tbw);
}
if( ptextarg == NULL && (g_GameSettings&GAME_TEXTURETARGS) ) {
// check if any part of the texture intersects the current target
if( !PSMT_ISCLUT(tpsm) && curvb.tex0.tbp0 >= curvb.frame.fbp && (curvb.tex0.tbp0 << 8) < curvb.prndr->end) {
ptextarg = curvb.prndr;
}
}
if( ptextarg != NULL && !(ptextarg->status&CRenderTarget::TS_NeedUpdate) ) {
if( PSMT_ISCLUT(tpsm) && tpsm != PSMT8H && tpsm != PSMT8 ) { // handle 8h cluts
// don't support clut targets, read from mem
// 4hl - kh2 check
if( tpsm != PSMT4HL && tpsm != PSMT4HH && s_ClutResolve <= 1 ) { // xenosaga requires 2 resolves
DWORD prevcount = curvb.dwCount;
curvb.dwCount = 0;
ptextarg->Resolve();
s_ClutResolve++;
curvb.dwCount = prevcount;
}
ptextarg = NULL;
}
else {
if( ptextarg == curvb.prndr ) {
// need feedback
if( ptextarg->pmimicparent != NULL ) {
// if the target is mimic, create the feedback of the parent
assert( ptextarg->pmimicparent->ptex == ptextarg->ptex || ptextarg->pmimicparent->ptexFeedback == ptextarg->ptex );
SAFE_RELEASE(ptextarg->ptexFeedback);
SAFE_RELEASE(ptextarg->psurfFeedback);
ptextarg->pmimicparent->CreateFeedback();
ptextarg->ptex = ptextarg->pmimicparent->ptex;
ptextarg->ptexFeedback = ptextarg->pmimicparent->ptexFeedback; ptextarg->ptexFeedback->AddRef();
ptextarg->psurf = ptextarg->pmimicparent->psurf;
ptextarg->psurfFeedback = ptextarg->pmimicparent->psurfFeedback; ptextarg->psurfFeedback->AddRef();
}
else
curvb.prndr->CreateFeedback();
pd3dDevice->SetRenderTarget(1, (s_bWriteDepth && curvb.pdepth != NULL) ? curvb.pdepth->psurf : NULL);
}
}
}
else ptextarg = NULL;
}
#ifdef PCSX2_DEBUG
if( g_bSaveFlushedFrame & 0x80000000 ) {
char str[255];
sprintf(str, "rndr.tga", g_SaveFrameNum);
D3DXSaveSurfaceToFile(str, D3DXIFF_TGA, curvb.prndr->psurf, NULL, NULL);
}
#endif
if( conf.options & GSOPTION_WIREFRAME ) {
// always render first few geometry as solid
if( s_nWireframeCount > 0 ) {
SETRS(D3DRS_FILLMODE, D3DFILL_SOLID);
}
}
if( !curvb.bVarsSetTarg )
SetContextTarget(context);
else {
assert( curvb.pdepth != NULL );
if( curvb.pdepth->status & CRenderTarget::TS_Virtual) {
if( !curvb.zbuf.zmsk ) {
CRenderTarget* ptemp = s_DepthRTs.Promote(curvb.pdepth->fbp|(curvb.pdepth->fbw<<16));
assert( ptemp == curvb.pdepth );
}
else
curvb.pdepth->status &= ~CRenderTarget::TS_NeedUpdate;
}
if( (curvb.pdepth->status & CRenderTarget::TS_NeedUpdate) || (curvb.prndr->status & CRenderTarget::TS_NeedUpdate) )
SetContextTarget(context);
}
SetTexVariables(context);
if( ptextarg == NULL && pmemtarg == NULL ) {
pmemtarg = g_MemTargs.GetMemoryTarget(curvb.tex0, 1);
if( vb[context].bVarsTexSync ) {
if( vb[context].pmemtarg != pmemtarg ) {
SetTexVariablesInt(context, GetTexFilter(curvb.tex1), curvb.tex0, pmemtarg, s_bForceTexFlush);
vb[context].bVarsTexSync = TRUE;
}
}
else {
SetTexVariablesInt(context, GetTexFilter(curvb.tex1), curvb.tex0, pmemtarg, s_bForceTexFlush);
vb[context].bVarsTexSync = TRUE;
INC_TEXVARS();
}
}
icurctx = context;
assert( !(curvb.prndr->status&CRenderTarget::TS_NeedUpdate) );
curvb.prndr->status = 0;
if( curvb.pdepth != NULL ) {
assert( !(curvb.pdepth->status&CRenderTarget::TS_NeedUpdate) );
if( !curvb.zbuf.zmsk ) {
assert( !(curvb.pdepth->status & CRenderTarget::TS_Virtual) );
curvb.pdepth->status = 0;
}
}
s_dwColorWrite = (curvb.prndr->psm&0xf) == 1 ? (D3DCOLORWRITEENABLE_BLUE|D3DCOLORWRITEENABLE_GREEN|D3DCOLORWRITEENABLE_RED) : 0xf;
if( ((curvb.frame.fbm)&0xff) == 0xff) s_dwColorWrite &= ~D3DCOLORWRITEENABLE_RED;
if( ((curvb.frame.fbm>>8)&0xff) == 0xff) s_dwColorWrite &= ~D3DCOLORWRITEENABLE_GREEN;
if( ((curvb.frame.fbm>>16)&0xff) == 0xff) s_dwColorWrite &= ~D3DCOLORWRITEENABLE_BLUE;
if( ((curvb.frame.fbm>>24)&0xff) == 0xff) s_dwColorWrite &= ~D3DCOLORWRITEENABLE_ALPHA;
SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite);
pd3dDevice->SetScissorRect(&curvb.prndr->scissorrect); // need to always set it since something in this code resets it
// set the shaders
pd3dDevice->SetVertexShader(pvs[2*((curvb.curprim._val>>1)&3)+8*s_bWriteDepth+context]);
pd3dDevice->SetStreamSource(0, curvb.pvb, curvb.dwCurOff*sizeof(VertexGPU), sizeof(VertexGPU));
DWORD dwUsingSpecialTesting = 0;
DWORD dwFilterOpts = 0;
IDirect3DPixelShader9* pps;
// need exact if equal or notequal
int exactcolor = 0;
if( g_nPixelShaderVer != SHADER_20 )
// ffx2 breaks when ==7
exactcolor = (curtest.ate && curtest.aref <= 128) && (curtest.atst==4);//||curtest.atst==7);
int shadertype = 0;
// set the correct pixel shaders
if( curvb.curprim.tme ) {
if( curvb.ptexClamp[0] != NULL ) pd3dDevice->SetTexture(SAMP_BITWISEANDX, curvb.ptexClamp[0]);
if( curvb.ptexClamp[1] != NULL ) pd3dDevice->SetTexture(SAMP_BITWISEANDY, curvb.ptexClamp[1]);
if( ptexRenderTargetCached != NULL ) {
DXVEC4 vpageoffset;
vpageoffset.w = 0;
int psm = curvb.tex0.psm;
assert( !PSMT_ISCLUT(curvb.tex0.psm));
pps = LoadShadeEffect(1, 0, curvb.curprim.fge, curvb.tex0.tcc && gs.texa.aem && (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S),
exactcolor, curvb.clamp, context);
pd3dDevice->SetTexture(SAMP_MEMORY0+context, ptexRenderTargetCached);
s_ptexCurSet[context] = ptexRenderTargetCached;
if( curvb.tex1.mmag ) {
pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
dwFilterOpts |= 1;
}
if( curvb.tex1.mmin ) {
pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
dwFilterOpts |= 2;
}
DXVEC4 vTexDims;
vTexDims.x = curvb.tex0.tw / (float)cachedtbw;
vTexDims.y = curvb.tex0.th / (float)cachedtbh;
// u32 tbp0 = curvb.tex0.tbp0 >> 5; // align to a page
// int blockheight = 32;
// int ycoord = ((curvb.tex0.tbp0-cachedtbp0)/(32*(cachedtbw>>6))) * blockheight;
// int xcoord = (((curvb.tex0.tbp0-cachedtbp0)%(32*(cachedtbw>>6)))) * 2;
//// xcoord += ptextarg->targoffx;
//// ycoord += ptextarg->targoffy;
// vTexDims.z = (float)xcoord / (float)cachedtbw;
// vTexDims.w = (float)ycoord / (float)cachedtbh;
vTexDims.z = vTexDims.w = 0;
SETCONSTF(GPU_TEXDIMS0+context, vTexDims);
SETCONSTF(GPU_PAGEOFFSET0+context, vpageoffset);
if( g_bSaveTex )
D3DXSaveTextureToFile("tex.tga", D3DXIFF_TGA, ptexRenderTargetCached, NULL);
}
else if( ptextarg != NULL ) {
if( ptextarg->IsDepth() )
SetWriteDepth();
DXVEC4 vpageoffset;
vpageoffset.w = 0;
shadertype = 1;
if( (curvb.tex0.psm == PSMT8 || curvb.tex0.psm == PSMT8H) && !(g_GameSettings&GAME_NOTARGETCLUT) ) {
// load the clut to memory
LPD3DTEX ptexclut = NULL;
pd3dDevice->CreateTexture(256, 1, 1, 0, (curvb.tex0.cpsm&2) ? D3DFMT_A1R5G5B5 : D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &ptexclut, NULL);
if( ptexclut != NULL ) {
D3DLOCKED_RECT lock;
ptexclut->LockRect(0, &lock, NULL, D3DLOCK_NOSYSLOCK);
// fill the buffer by decoding the clut
int nClutOffset = 0, clutsize;
int entries = (curvb.tex0.psm&3)==3 ? 256 : 16;
if( curvb.tex0.cpsm <= 1 ) { // 32 bit
nClutOffset = 64 * curvb.tex0.csa;
clutsize = min(entries, 256-curvb.tex0.csa*16)*4;
}
else {
nClutOffset = 64 * (curvb.tex0.csa&15) + (curvb.tex0.csa>=16?2:0);
clutsize = min(entries, 512-curvb.tex0.csa*16)*2;
}
if( curvb.tex0.cpsm <= 1 ) { // 32 bit
memcpy_amd(lock.pBits, ZeroGS::g_pbyGSClut+nClutOffset, clutsize);
}
else {
u16* pClutBuffer = (u16*)(ZeroGS::g_pbyGSClut + nClutOffset);
u16* pclut = (u16*)lock.pBits;
int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset&0x3ff)/2)+clutsize-512;
if( left > 0 ) clutsize -= left;
while(clutsize > 0) {
pclut[0] = pClutBuffer[0];
pclut++;
pClutBuffer+=2;
clutsize -= 2;
}
if( left > 0) {
pClutBuffer = (u16*)(ZeroGS::g_pbyGSClut + 2);
while(left > 0) {
pclut[0] = pClutBuffer[0];
left -= 2;
pClutBuffer += 2;
pclut++;
}
}
}
ptexclut->UnlockRect(0);
s_vecTempTextures.push_back(ptexclut);
pd3dDevice->SetTexture(SAMP_FINAL, ptexclut);
if( g_bSaveTex )
D3DXSaveTextureToFile("clut.tga", D3DXIFF_TGA, ptexclut, NULL);
}
if( g_nPixelShaderVer != SHADER_20 && (ptextarg->psm & 2) ) {
// 16 bit texture
shadertype = 4;
DXVEC4 v;
v.x = 16.0f / (float)ptextarg->fbw;
v.y = 64.0f / (float)ptextarg->fbh;
v.z = 0.5f * v.x;
v.w = 0.5f * v.y;
SETCONSTF(GPU_TEXOFFSET0, v);
v.x = 1;
v.y = -0.5f;
v.z = 0;
v.w = 0.0001f;
SETCONSTF(GPU_PAGEOFFSET0, v);
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
pd3dDevice->SetTexture(SAMP_BILINEARBLOCKS, ptexConv32to16);
}
else
shadertype = 2;
}
else {
if( PSMT_ISCLUT(curvb.tex0.psm) )
WARN_LOG("Using render target with CLUTs %d!\n", curvb.tex0.psm);
else {
if( (curvb.tex0.psm&2) != (ptextarg->psm&2) && (g_nPixelShaderVer != SHADER_20 || !curvb.curprim.fge) ) {
if( curvb.tex0.psm & 2 ) {
// converting from 32->16
shadertype = 3;
DXVEC4 v;
v.x = 16.0f / (float)curvb.tex0.tw;
v.y = 64.0f / (float)curvb.tex0.th;
v.z = 0.5f * v.x;
v.w = 0.5f * v.y;
SETCONSTF(GPU_TEXOFFSET0+context, v);
vpageoffset.x = -0.1f / 256.0f;
vpageoffset.y = -0.001f / 256.0f;
vpageoffset.z = -0.1f / ptextarg->fbh;
vpageoffset.w = ((ptextarg->psm&0x30)==0x30)?-1.0f:0.0f;
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
pd3dDevice->SetTexture(SAMP_BILINEARBLOCKS, ptexConv16to32);
}
else {
// converting from 16->32
WARN_LOG("ZeroGS: converting from 16 to 32bit RTs\n");
//shadetype = 4;
}
}
}
}
int psm = curvb.tex0.psm;
if( PSMT_ISCLUT(curvb.tex0.psm) ) psm = curvb.tex0.cpsm;
pps = LoadShadeEffect(shadertype, 0, curvb.curprim.fge, curvb.tex0.tcc && gs.texa.aem && (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S),
exactcolor, curvb.clamp, context);
LPD3DTEX ptexset = ptextarg == curvb.prndr ? ptextarg->ptexFeedback : ptextarg->ptex;
pd3dDevice->SetTexture(SAMP_MEMORY0+context, ptexset);
s_ptexCurSet[context] = ptexset;
if( curvb.tex1.mmag ) {
pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
dwFilterOpts |= 1;
}
if( curvb.tex1.mmin ) {
pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
dwFilterOpts |= 2;
}
DXVEC4 vTexDims;
vTexDims.x = curvb.tex0.tw / (float)ptextarg->fbw;
vTexDims.y = curvb.tex0.th / (float)ptextarg->targheight;
// look at the offset of tbp0 from fbp
if( curvb.tex0.tbp0 <= ptextarg->fbp ) {
vTexDims.z = 0;//-0.5f/(float)ptextarg->fbw;
vTexDims.w = 0;//0.2f/(float)ptextarg->fbh;
}
else {
u32 tbp0 = curvb.tex0.tbp0 >> 5; // align to a page
int blockheight = (ptextarg->psm&2) ? 64 : 32;
int ycoord = ((curvb.tex0.tbp0-ptextarg->fbp)/(32*(ptextarg->fbw>>6))) * blockheight;
int xcoord = (((curvb.tex0.tbp0-ptextarg->fbp)%(32*(ptextarg->fbw>>6)))) * 2;
xcoord += ptextarg->targoffx;
ycoord += ptextarg->targoffy;
vTexDims.z = (float)xcoord / (float)ptextarg->fbw;
vTexDims.w = (float)ycoord / (float)ptextarg->targheight;
}
if( shadertype == 4 ) {
vTexDims.z += 8.0f / (float)ptextarg->fbw;
}
SETCONSTF(GPU_TEXDIMS0+context, vTexDims);
// zoe2
if( (ptextarg->psm&0x30) == 0x30 ) {//&& (psm&2) == (ptextarg->psm&2) ) {
// target of zbuf has +1 added to it, don't do 16bit
vpageoffset.w = -1;
// DXVEC4 valpha2;
// valpha2.x = 1; valpha2.y = 0;
// valpha2.z = -1; valpha2.w = 0;
// SETCONSTF(GPU_TEXALPHA20+context, &valpha2);
}
SETCONSTF(GPU_PAGEOFFSET0+context, vpageoffset);
if( g_bSaveTex )
D3DXSaveTextureToFile("tex.tga", D3DXIFF_TGA, ptextarg == curvb.prndr ? ptextarg->ptexFeedback : ptextarg->ptex, NULL);
}
else {
// save the texture
#ifdef PCSX2_DEBUG
// CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(curvb.tex0, 0);
// assert( curvb.pmemtarg == pmemtarg );
// if( PSMT_ISCLUT(curvb.tex0.psm) )
// assert( curvb.pmemtarg->ValidateClut(curvb.tex0) );
#endif
//#ifdef ZEROGS_CACHEDCLEAR
// if( !curvb.pmemtarg->ValidateTex(curvb.tex0, true) ) {
// CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(curvb.tex0, 1);
// SetTexVariablesInt(context, GetTexFilter(curvb.tex1), curvb.tex0, pmemtarg, s_bForceTexFlush);
// vb[context].bVarsTexSync = TRUE;
// }
//#endif
if( g_bSaveTex ) {
if( g_bSaveTex == 1 ) SaveTex(&curvb.tex0, 1);
else SaveTex(&curvb.tex0, 0);
}
int psm = curvb.tex0.psm;
if( PSMT_ISCLUT(curvb.tex0.psm) ) psm = curvb.tex0.cpsm;
pps = LoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge,
curvb.tex0.tcc && gs.texa.aem && (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S),
exactcolor, curvb.clamp, context);
}
}
else pps = ppsRegular[curvb.curprim.fge+2*s_bWriteDepth];
pd3dDevice->SetPixelShader(pps);
BOOL bCanRenderStencil = g_bUpdateStencil && (curvb.prndr->psm&0xf) != 1 && !(curvb.frame.fbm&0x80000000);
if( g_GameSettings & GAME_NOSTENCIL )
bCanRenderStencil = 0;
if( s_bDestAlphaTest) {
SETRS(D3DRS_STENCILENABLE, bCanRenderStencil);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_KEEP);
SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS);
}
else SETRS(D3DRS_STENCILENABLE, 0);
SETRS(D3DRS_ZWRITEENABLE, !curvb.zbuf.zmsk);
SETRS(D3DRS_ZENABLE, curtest.zte);
if( curtest.zte ) {
if( curtest.ztst > 1 ) g_nDepthUsed = 2;
if( (curtest.ztst == 2) ^ (g_nDepthBias != 0) ) {
g_nDepthBias = curtest.ztst == 2;
if( g_GameSettings & GAME_RELAXEDDEPTH )
SETRS(D3DRS_DEPTHBIAS, g_nDepthBias?FtoDW(0.00003f):FtoDW(0.0001f));
else
SETRS(D3DRS_DEPTHBIAS, g_nDepthBias?FtoDW(0.0003f):FtoDW(0.000015f));
}
SETRS(D3DRS_ZFUNC, g_dwZCmp[curtest.ztst]);
// if( curtest.ztst == 3 ) {
// // gequal
// if( s_vznorm.y == 0 ) {
// s_vznorm.y = 0.00001f;
// SETCONSTF(GPU_ZNORM, s_vznorm);
// }
// }
// else {
// if( s_vznorm.y > 0 ) {
// s_vznorm.y = 0;
// SETCONSTF(GPU_ZNORM, s_vznorm);
// }
// }
}
SETRS(D3DRS_ALPHATESTENABLE, curtest.ate&&USEALPHATESTING);
if( curtest.ate ) {
if( curtest.atst == 7 && curtest.aref == 255 ) {
// when it is at the very top, do a less than rather than not equal (gekibo2)
SETRS(D3DRS_ALPHAFUNC, D3DCMP_LESS);
SETRS(D3DRS_ALPHAREF, 255);
}
else {
SETRS(D3DRS_ALPHAFUNC, g_dwAlphaCmp[curtest.atst]);
SETRS(D3DRS_ALPHAREF, b2XAlphaTest ? min(255,2 * curtest.aref) : curtest.aref);
}
}
if( s_bWriteDepth ) {
//pd3dDevice->SetRenderTarget(0, curvb.prndr->psurf);
//pd3dDevice->SetRenderTarget(1, !curvb.zbuf.zmsk?curvb.pdepth->psurf:NULL);
if( bIndepWriteMasks )
SETRS(D3DRS_COLORWRITEENABLE1, !curvb.zbuf.zmsk?0xf:0);
else
pd3dDevice->SetRenderTarget(1, !curvb.zbuf.zmsk?curvb.pdepth->psurf:NULL);
}
if( curvb.curprim.abe )
SetAlphaVariables(curvb.alpha);
else
SETRS(D3DRS_ALPHABLENDENABLE, 0);
// needs to be before RenderAlphaTest
if( curvb.fba.fba || s_bDestAlphaTest ) {
if( gs.pabe || (curvb.fba.fba || bCanRenderStencil) && !(curvb.frame.fbm&0x80000000) ) {
RenderFBA(curvb);
}
}
u32 oldabe = curvb.curprim.abe;
if( gs.pabe ) {
//WARN_LOG("PBE!\n");
curvb.curprim.abe = 1;
SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING);
}
if( curvb.curprim.abe && bNeedAlphaColor ) {
if( //bCanRenderStencil &&
(bNeedBlendFactorInAlpha || ((curtest.ate && curtest.atst>1) && (curtest.aref > 0x80))) ) {
// need special stencil processing for the alpha
RenderAlphaTest(curvb);
dwUsingSpecialTesting = 1;
}
// harvest fishing
DXVEC4 v = vAlphaBlendColor;// + DXVEC4(0,0,0,(curvb.test.atst==4 && curvb.test.aref>=128)?-0.004f:0);
if( exactcolor ) { v.y *= 255; v.w *= 255; }
SETCONSTF(GPU_ONECOLOR, v);
}
else {
// not using blending so set to defaults
DXVEC4 v = exactcolor ? DXVEC4(1, 510*255.0f/256.0f, 0, 0) : DXVEC4(1,2*255.0f/256.0f,0,0);
SETCONSTF(GPU_ONECOLOR, v);
}
if( s_bDestAlphaTest && bCanRenderStencil ) {
// if not 24bit and can write to high alpha bit
RenderStencil(curvb, dwUsingSpecialTesting);
}
else {
dwStencilRef = STENCIL_SPECIAL;
dwStencilMask = STENCIL_SPECIAL;
// setup the stencil to only accept the test pixels
if( dwUsingSpecialTesting ) {
SETRS(D3DRS_STENCILENABLE, TRUE);
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_PIXELWRITE);
SETRS(D3DRS_STENCILMASK, STENCIL_SPECIAL);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL);
SETRS(D3DRS_STENCILREF, STENCIL_SPECIAL|STENCIL_PIXELWRITE);
}
}
#ifdef PCSX2_DEBUG
if( bDestAlphaColor == 1 ) {
WARN_LOG("dest alpha blending! manipulate alpha here\n");
}
#endif
if( bCanRenderStencil && gs.pabe ) {
// only render the pixels with alpha values >= 0x80
SETRS(D3DRS_STENCILREF, dwStencilRef|STENCIL_FBA);
SETRS(D3DRS_STENCILMASK, dwStencilMask|STENCIL_FBA);
if( !dwStencilMask ) SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL);
}
// curvb.prndr->SetViewport();
// pd3dDevice->SetScissorRect(&curvb.prndr->scissorrect);
// SETRS(D3DRS_SCISSORTESTENABLE, TRUE);
if( !curvb.test.ate || curvb.test.atst > 0 ) {
DRAW();
}
if( gs.pabe ) {
// only render the pixels with alpha values < 0x80
SETRS(D3DRS_ALPHABLENDENABLE, 0);
SETRS(D3DRS_STENCILREF, dwStencilRef);
DXVEC4 v;
v.x = 1; v.y = 2; v.z = 0; v.w = 0;
if( exactcolor ) v.y *= 255;
SETCONSTF(GPU_ONECOLOR, v);
DRAW();
// reset
SETRS(D3DRS_STENCILMASK, dwStencilMask);
if( !dwStencilMask ) SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS);
}
// more work on alpha failure case
if( curtest.ate && curtest.atst != 1 && curtest.afail > 0 ) {
// need to reverse the test and disable some targets
SETRS(D3DRS_ALPHAFUNC, g_dwReverseAlphaCmp[curtest.atst]);
if( curtest.afail & 1 ) { // front buffer update only
if( curtest.afail == 3 ) // disable alpha
SETRS(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_BLUE|D3DCOLORWRITEENABLE_GREEN|D3DCOLORWRITEENABLE_RED);
SETRS(D3DRS_ZWRITEENABLE, FALSE);
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0);
else pd3dDevice->SetRenderTarget(1,NULL);
}
}
else {
// zbuffer update only
SETRS(D3DRS_COLORWRITEENABLE, 0);
}
if( gs.pabe && bCanRenderStencil ) {
// only render the pixels with alpha values >= 0x80
DXVEC4 v = vAlphaBlendColor;
if( exactcolor ) { v.y *= 255; v.w *= 255; }
SETCONSTF(GPU_ONECOLOR, v);
SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING);
SETRS(D3DRS_STENCILREF, dwStencilRef|STENCIL_FBA);
SETRS(D3DRS_STENCILMASK, dwStencilMask|STENCIL_FBA);
if( !dwStencilMask ) SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL);
}
// setup the stencil to only accept the test pixels
if( dwUsingSpecialTesting ) {
if( !s_bDestAlphaTest || !bCanRenderStencil ) {
SETRS(D3DRS_STENCILENABLE, FALSE);
}
}
// IDirect3DQuery9* pOcclusionQuery;
// DWORD numberOfPixelsDrawn;
//
// pd3dDevice->CreateQuery(D3DQUERYTYPE_OCCLUSION, &pOcclusionQuery);
//
// // Add an end marker to the command buffer queue.
// pOcclusionQuery->Issue(D3DISSUE_BEGIN);
DRAW();
// pOcclusionQuery->Issue(D3DISSUE_END);
// Force the driver to execute the commands from the command buffer.
// Empty the command buffer and wait until the GPU is idle.
// while(S_FALSE == pOcclusionQuery->GetData( &numberOfPixelsDrawn, sizeof(DWORD), D3DGETDATA_FLUSH ));
// SAFE_RELEASE(pOcclusionQuery);
if( gs.pabe ) {
// only render the pixels with alpha values < 0x80
SETRS(D3DRS_ALPHABLENDENABLE, 0);
SETRS(D3DRS_STENCILREF, dwStencilRef);
DXVEC4 v;
v.x = 1; v.y = 2; v.z = 0; v.w = 0;
if( exactcolor ) v.y *= 255;
SETCONSTF(GPU_ONECOLOR, v);
DRAW();
// reset
SETRS(D3DRS_STENCILMASK, dwStencilMask);
SETRS(D3DRS_ALPHABLENDENABLE, oldabe);
if( !dwStencilMask ) SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS);
}
// restore
if( (curtest.afail & 1) && !curvb.zbuf.zmsk ) {
SETRS(D3DRS_ZWRITEENABLE, TRUE);
if( s_bWriteDepth ) {
assert( curvb.pdepth != NULL);
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf);
else pd3dDevice->SetRenderTarget(1,curvb.pdepth->psurf);
}
}
SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite);
// not needed anymore since rest of ops concentrate on image processing
//SETRS(D3DRS_ALPHAFUNC, g_dwAlphaCmp[curtest.atst]);
}
if( dwUsingSpecialTesting ) {
// render the real alpha
SETRS(D3DRS_ALPHATESTENABLE, FALSE);
SETRS(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA);
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0);
else pd3dDevice->SetRenderTarget(1,NULL);
}
SETRS(D3DRS_ZWRITEENABLE, FALSE);
SETRS(D3DRS_STENCILMASK, STENCIL_SPECIAL|STENCIL_PIXELWRITE);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_KEEP);
SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL);
SETRS(D3DRS_STENCILREF, STENCIL_SPECIAL|STENCIL_PIXELWRITE);
DXVEC4 v = DXVEC4(0,exactcolor ? 510.0f : 2.0f,0,0);
SETCONSTF(GPU_ONECOLOR, v);
DRAW();
// don't need to restore
}
if( s_bDestAlphaTest ) {
if( (s_dwColorWrite&D3DCOLORWRITEENABLE_ALPHA) ) {
if( curvb.fba.fba )
ProcessFBA(curvb);
else if( bCanRenderStencil )
// finally make sure all entries are 1 when the dest alpha >= 0x80 (if fba is 1, this is already the case)
ProcessStencil(curvb);
}
}
else if( (s_dwColorWrite&D3DCOLORWRITEENABLE_ALPHA) && curvb.fba.fba )
ProcessFBA(curvb);
if( bDestAlphaColor == 1 ) {
// need to reset the dest colors to their original counter parts
//WARN_LOG("Need to reset dest alpha color\n");
}
#ifdef PCSX2_DEBUG
if( g_bSaveFlushedFrame & 0xf ) {
char str[255];
sprintf(str, "frames\\frame%.4d.jpg", g_SaveFrameNum++);
if( (g_bSaveFlushedFrame & 2) )
D3DXSaveSurfaceToFile(str, D3DXIFF_JPG, curvb.prndr->psurf, NULL, NULL);
}
#endif
// clamp the final colors, when enabled ffx2 credits mess up
if( curvb.curprim.abe && bAlphaClamping && g_RenderFormat != D3DFMT_A8R8G8B8 && !(g_GameSettings&GAME_NOCOLORCLAMP) ) { // if !colclamp, skip
ResetAlphaVariables();
// if processing the clamping case, make sure can write to the front buffer
SETRS(D3DRS_STENCILENABLE, 0);
SETRS(D3DRS_ALPHABLENDENABLE, TRUE);
SETRS(D3DRS_ALPHATESTENABLE, FALSE);
SETRS(D3DRS_ZENABLE, FALSE);
SETRS(D3DRS_ZWRITEENABLE, FALSE);
SETRS(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED|D3DCOLORWRITEENABLE_BLUE|D3DCOLORWRITEENABLE_GREEN);
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0);
else pd3dDevice->SetRenderTarget(1,NULL);
}
pd3dDevice->SetPixelShader(ppsOne);
// (dest&0x7f)+0x80, blend factor for alpha is always 0x80
SETRS(D3DRS_DESTBLEND, D3DBLEND_ONE);
SETRS(D3DRS_SRCBLEND, D3DBLEND_ONE);
float f;
if( bAlphaClamping & 1 ) { // min
f = 0;
SETCONSTF(GPU_ONECOLOR, &f);
SETRS(D3DRS_BLENDOP, D3DBLENDOP_MAX);
DRAW();
}
// bios shows white screen
if( bAlphaClamping & 2 ) { // max
f = 1;
SETCONSTF(GPU_ONECOLOR, &f);
SETRS(D3DRS_BLENDOP, D3DBLENDOP_MIN);
DRAW();
}
if( !curvb.zbuf.zmsk ) {
SETRS(D3DRS_ZWRITEENABLE, TRUE);
if( s_bWriteDepth ) {
assert( curvb.pdepth != NULL );
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf);
else pd3dDevice->SetRenderTarget(1,curvb.pdepth->psurf);
}
}
if( curvb.test.ate && USEALPHATESTING )
SETRS(D3DRS_ALPHATESTENABLE, TRUE);
SETRS(D3DRS_ZENABLE, curtest.zte);
}
if( dwFilterOpts ) {
// undo filter changes
if( dwFilterOpts & 1 ) pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
if( dwFilterOpts & 2 ) pd3dDevice->SetSamplerState(SAMP_MEMORY0+context, D3DSAMP_MINFILTER, D3DTEXF_POINT);
}
// reset used textures
if( shadertype > 2 ) {
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP);
pd3dDevice->SetSamplerState(SAMP_BILINEARBLOCKS, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP);
pd3dDevice->SetTexture(SAMP_BILINEARBLOCKS, ptexBilinearBlocks);
}
SETRS(D3DRS_CLIPPLANEENABLE, 0);
//#ifndef RELEASE_TO_PUBLIC
ppf += curvb.dwCount+0x100000;
//#endif
curvb.dwCurOff += POINT_BUFFERFLUSH;
SAFE_RELEASE(ptexRenderTargetCached);
g_MaxRenderedHeight = 0;
curvb.dwCount = 0;
//curvb.Lock();
curvb.curprim.abe = oldabe;
//if( oldabe ) SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING);
if( conf.options & GSOPTION_WIREFRAME ) {
// always render first few geometry as solid
if( s_nWireframeCount > 0 ) {
SETRS(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
--s_nWireframeCount;
}
}
}
void ZeroGS::ProcessMessages()
{
if( listMsgs.size() > 0 ) {
pSprite->Begin(D3DXSPRITE_ALPHABLEND|D3DXSPRITE_SORT_TEXTURE);
RECT rctext;
rctext.left = 25; rctext.top = 15;
list<MESSAGE>::iterator it = listMsgs.begin();
while( it != listMsgs.end() ) {
rctext.left += 1;
rctext.top += 1;
pFont->DrawText(pSprite, it->str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xff000000);
rctext.left -= 1;
rctext.top -= 1;
pFont->DrawText(pSprite, it->str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xffffff30);
rctext.top += 15;
if( (int)(it->dwTimeStamp - timeGetTime()) < 0 )
it = listMsgs.erase(it);
else ++it;
}
pSprite->End();
}
}
void ZeroGS::RenderCustom(float fAlpha)
{
if( !s_bBeginScene )
pd3dDevice->BeginScene();
pd3dDevice->SetDepthStencilSurface(psurfOrgDepth);
pd3dDevice->SetRenderTarget(0, psurfOrgTarg);
if( s_bWriteDepth )
pd3dDevice->SetRenderTarget(1, NULL);
SETRS(D3DRS_STENCILENABLE, 0);
SETRS(D3DRS_ZENABLE, FALSE);
SETRS(D3DRS_ZWRITEENABLE, FALSE);
SETRS(D3DRS_COLORWRITEENABLE, 0xf);
SETRS(D3DRS_ALPHABLENDENABLE, 0);
SETRS(D3DRS_ALPHATESTENABLE, 0);
SETRS(D3DRS_SCISSORTESTENABLE, 0);
// play custom animation
pd3dDevice->Clear(0, NULL, D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER|D3DCLEAR_STENCIL, 0, 1, 0);
// tex coords
DXVEC4 v = DXVEC4(1, 1, 0, 0);
SETCONSTF(GPU_BITBLTTEX, v);
SETCONSTF(GPU_BITBLTPOS, v);
v.x = v.y = v.z = v.w = fAlpha;
SETCONSTF(GPU_ONECOLOR, v);
if( conf.options & GSOPTION_WIREFRAME ) SETRS(D3DRS_FILLMODE, D3DFILL_SOLID);
pd3dDevice->SetVertexShader(pvsBitBlt);
pd3dDevice->SetStreamSource(0, pvbRect, 0, sizeof(VertexGPU));
pd3dDevice->SetPixelShader(ppsBaseTexture);
// inside vb[0]'s target area, so render that region only
pd3dDevice->SetTexture(SAMP_FINAL, ptexLogo);
//pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
//pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);
// restore
//pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
//pd3dDevice->SetSamplerState(SAMP_FINAL, D3DSAMP_MINFILTER, D3DTEXF_POINT);
if( conf.options & GSOPTION_WIREFRAME ) SETRS(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
ProcessMessages();
pd3dDevice->EndScene();
s_bBeginScene = FALSE;
pd3dDevice->Present(NULL, NULL, NULL, NULL);
SETRS(D3DRS_SCISSORTESTENABLE, TRUE);
SETRS(D3DRS_STENCILENABLE, 1);
if( icurctx >= 0 ) vb[icurctx].bSyncVars = 0;
}
// adjusts trans to preserve aspect ratio
void ZeroGS::AdjustTransToAspect(DXVEC4& v, int dispwidth, int dispheight)
{
float temp, f;
if( dispwidth * height > dispheight * width ) {
// limited by width
// change in ratio
f = ((float)width / (float)dispwidth) / ((float)height / (float)dispheight);
v.y *= f;
v.w *= f;
// scanlines mess up when not aligned right
v.y += (1-modf(v.y*height/2+0.05f, &temp))*2.0f/(float)height;
v.w += (1-modf(v.w*height/2+0.05f, &temp))*2.0f/(float)height;
}
else {
// limited by height
f = ((float)height / (float)dispheight) / ((float)width / (float)dispwidth);
f -= modf(f*width, &temp)/(float)width;
v.x *= f;
v.z *= f;
}
}
void ZeroGS::Restore()
{
if( !g_bIsLost )
return;
if( SUCCEEDED(pd3dDevice->Reset(&d3dpp)) ) {
g_bIsLost = 0;
// handle lost states
ZeroGS::ChangeDeviceSize(width, height);
}
}
void ZeroGS::RenderCRTC(int interlace)
{
if( pd3dDevice == NULL ) {
return;
}
if( g_bIsLost ) return;
#ifdef RELEASE_TO_PUBLIC
if( g_nRealFrame < 80 ) {
RenderCustom( min(1.0f, 2.0f - (float)g_nRealFrame / 40.0f) );
if( g_nRealFrame == 79 )
SAFE_RELEASE(ptexLogo);
return;
}
#endif
Flush(0);
Flush(1);
// frame skipping
if( g_nFrameRender > 0 ) {
if( g_nFrameRender < 8 ) {
g_nFrameRender++;
if( g_nFrameRender <= 3 ) {
g_nFramesSkipped++;
return;
}
}
}
else {
if( g_nFrameRender < -1 ) {
g_nFramesSkipped++;
return;
}
g_nFrameRender--;
}
if( g_bSaveFrame ) {
if( vb[0].prndr != NULL ) D3DXSaveSurfaceToFile("frame1.tga", D3DXIFF_TGA, vb[0].prndr->psurf, NULL, NULL);
if( vb[1].prndr != NULL && vb[0].prndr != vb[1].prndr ) D3DXSaveSurfaceToFile("frame2.tga", D3DXIFF_TGA, vb[1].prndr->psurf, NULL, NULL);
else DeleteFile("frame2.tga");
}
if( s_RangeMngr.ranges.size() > 0 )
FlushTransferRanges(NULL);
if( icurctx >= 0 && vb[icurctx].bVarsSetTarg ) { // check if anything rendered
pd3dDevice->SetRenderTarget(0, psurfOrgTarg);
pd3dDevice->SetRenderTarget(1, NULL);
pd3dDevice->SetDepthStencilSurface(psurfOrgDepth);
}
D3DVIEWPORT9 view;
view.Width = width;
view.Height = height;
view.X = 0;
view.Y = 0;
view.MinZ = 0;
view.MaxZ = 1.0f;
pd3dDevice->SetViewport(&view);
//g_GameSettings |= GAME_VSSHACK|GAME_FULL16BITRES|GAME_NODEPTHRESOLVE;
//s_bWriteDepth = TRUE;
g_SaveFrameNum = 0;
g_bSaveFlushedFrame = 1;
// static int counter = 0;
// counter++;
// reset fba after every frame
//if( !(g_GameSettings&GAME_NOFBARESET) ) {
vb[0].fba.fba = 0;
vb[1].fba.fba = 0;
//}
u32 bInterlace = SMODE2->INT && SMODE2->FFMD && (conf.interlace<2);
// if interlace, only clear every other vsync
if(!bInterlace ) {
u32 color = D3DCOLOR_ARGB(0, BGCOLOR->R, BGCOLOR->G, BGCOLOR->B);
pd3dDevice->Clear(0, NULL, D3DCLEAR_TARGET|D3DCLEAR_STENCIL, color, 1, 0);
}
if( !s_bBeginScene ) {
pd3dDevice->BeginScene();
s_bBeginScene = TRUE;
}
pd3dDevice->SetVertexShader(pvsBitBlt);
pd3dDevice->SetStreamSource(0, pvbRect, 0, sizeof(VertexGPU));
if( conf.options & GSOPTION_WIREFRAME ) SETRS(D3DRS_FILLMODE, D3DFILL_SOLID);
SETRS(D3DRS_ZENABLE, 0);
SETRS(D3DRS_ZWRITEENABLE, 0);
SETRS(D3DRS_COLORWRITEENABLE, 0xf);
SETRS(D3DRS_ALPHABLENDENABLE, 0);
SETRS(D3DRS_ALPHATESTENABLE, 0);
SETRS(D3DRS_SCISSORTESTENABLE, 0);
SETRS(D3DRS_STENCILENABLE, 0);
BOOL bUsingStencil = 0;
if( bInterlace ) g_PrevBitwiseTexX = -1; // reset since will be using
tex0Info dispinfo[2];
for(int i = 0; i < 2; ++i) {
if( !(*(u32*)(PMODE) & (1<<i)) ) {
dispinfo[i].tw = 0;
dispinfo[i].th = 0;
continue;
}
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
GSRegDISPLAY* pd = i ? DISPLAY2 : DISPLAY1;
int magh = pd->MAGH+1;
int magv = pd->MAGV+1;
dispinfo[i].tbp0 = pfb->FBP << 5;
dispinfo[i].tbw = pfb->FBW << 6;
dispinfo[i].tw = (pd->DW + 1) / magh;
dispinfo[i].th = (pd->DH + 1) / magv;
dispinfo[i].psm = pfb->PSM;
// hack!!
// 2 * dispinfo[i].tw / dispinfo[i].th <= 1, metal slug 4
if( bInterlace && 2 * dispinfo[i].tw / dispinfo[i].th <= 1 && !(g_GameSettings&GAME_INTERLACE2X) ) {
dispinfo[i].th >>= 1;
}
}
//int dispwidth = max(dispinfo[0].tw, dispinfo[1].tw), dispheight = max(dispinfo[0].th, dispinfo[1].th);
// hack!, CMOD != 3, gradius
// if( SMODE2->INT && SMODE2->FFMD && SMODE1->CMOD == 3 && dispwidth <= 320)
// dispwidth *= 2;
// hack! makai
//if( !bInterlace && dispheight * 2 < dispwidth ) dispheight *= 2;
// start from the last circuit
for(int i = !PMODE->SLBG; i >= 0; --i) {
tex0Info& texframe = dispinfo[i];
if( texframe.th <= 1 )
continue;
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
GSRegDISPLAY* pd = i ? DISPLAY2 : DISPLAY1;
DXVEC4 v, valpha;
if( bInterlace ) {
texframe.th >>= 1;
// interlace mode
pd3dDevice->SetTexture(SAMP_INTERLACE, CreateInterlaceTex(2*texframe.th));
if( interlace == (conf.interlace&1) ) {
// pass if odd
valpha.z = 1.0f;
valpha.w = -0.4999f;
}
else {
// pass if even
valpha.z = -1.0f;
valpha.w = 0.5001f;
}
}
else {
if( SMODE2->INT && SMODE2->FFMD ) {
texframe.th >>= 1;
}
// always pass interlace test
valpha.z = 0;
valpha.w = 1;
}
int bpp = 4;
if( texframe.psm == 0x12 ) bpp = 3;
else if( texframe.psm & 2 ) bpp = 2;
// get the start and end addresses of the buffer
int start, end;
GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
if( i == 0 ) {
// setup right blending
SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING);
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
SETRS(D3DRS_BLENDOPALPHA, D3DBLENDOP_ADD);
if( PMODE->MMOD ) {
SETRS(D3DRS_BLENDFACTOR, D3DCOLOR_ARGB(0x80, PMODE->ALP, PMODE->ALP, PMODE->ALP));
SETRS(D3DRS_SRCBLEND, D3DBLEND_BLENDFACTOR);
SETRS(D3DRS_DESTBLEND, D3DBLEND_INVBLENDFACTOR);
}
else {
SETRS(D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);
SETRS(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
}
SETRS(D3DRS_SRCBLENDALPHA, PMODE->AMOD ? D3DBLEND_ZERO : D3DBLEND_ONE);
SETRS(D3DRS_DESTBLENDALPHA, PMODE->AMOD? D3DBLEND_ONE : D3DBLEND_ZERO);
}
if( bUsingStencil ) {
SETRS(D3DRS_STENCILWRITEMASK, 1<<i);
SETRS(D3DRS_STENCILMASK, 1<<i);
}
if( texframe.psm == 0x12 ) {
WARN_LOG("CRTC24!!!\n");
// assume that data is already in ptexMem (do Resolve?)
pd3dDevice->SetPixelShader(ppsCRTC24[bInterlace]);
valpha.x = 0;
valpha.y = 1;
SETCONSTF(GPU_ONECOLOR, valpha);
pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);
continue;
}
// first render the current render targets, then from ptexMem
if( texframe.psm == 1 ) {
valpha.x = 0;
valpha.y = 1;
}
else {
valpha.x = 1;
valpha.y = 0;
}
SETCONSTF(GPU_ONECOLOR, valpha);
BOOL bSkip = 0;
BOOL bResolveTargs = 1;
//s_mapFrameHeights[s_nCurFrameMap][texframe.tbp0] = texframe.th;
list<CRenderTarget*> listTargs;
s_RTs.GetTargs(start, end, listTargs);
for(list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ) {
CRenderTarget* ptarg = *it;
if( ptarg->fbw == texframe.tbw && !(ptarg->status&CRenderTarget::TS_NeedUpdate) && ((256/bpp)*(texframe.tbp0-ptarg->fbp))%texframe.tbw == 0 ) {
if( ptarg->fbp != texframe.tbp0 ) {
// look for a better target (metal slug 5)
list<CRenderTarget*>::iterator itbetter;
for(itbetter = listTargs.begin(); itbetter != listTargs.end(); ++itbetter ) {
if( (*itbetter)->fbp == texframe.tbp0 )
break;
}
if( itbetter != listTargs.end() ) {
it = listTargs.erase(it);
continue;
}
}
static int sindex = 0;
char strtemp[25];
sprintf(strtemp, "frames/frame%d.jpg", sindex++);
// D3DXSaveSurfaceToFile(strtemp, D3DXIFF_JPG, ptarg->psurf, NULL, NULL);
// if( g_bSaveFinalFrame )
// D3DXSaveSurfaceToFile("frame1.tga", D3DXIFF_TGA, ptarg->psurf, NULL, NULL);
int dby = pfb->DBY;
int movy = 0;
// determine the rectangle to render
if( ptarg->fbp < texframe.tbp0 ) {
dby += (256/bpp)*(texframe.tbp0-ptarg->fbp)/texframe.tbw;
}
else if( ptarg->fbp > texframe.tbp0 ) {
dby -= (256/bpp)*(ptarg->fbp-texframe.tbp0)/texframe.tbw;
if( dby < 0 ) {
movy = -dby;
dby = 0;
}
}
int dh = min(ptarg->fbh - dby, texframe.th-movy);
if( dh >= 64 ) {
if( ptarg->fbh - dby < texframe.th-movy && !bUsingStencil ) {
if( !bUsingStencil ) {
pd3dDevice->Clear(0, NULL, D3DCLEAR_STENCIL, 0, 1, 0);
}
bUsingStencil = 1;
SETRS(D3DRS_STENCILENABLE, TRUE);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_STENCILFUNC, D3DCMP_NOTEQUAL);
SETRS(D3DRS_STENCILREF, 3);
SETRS(D3DRS_STENCILWRITEMASK, 1<<i);
SETRS(D3DRS_STENCILMASK, 1<<i);
}
float fiw = 1.0f / texframe.tbw;
float fih = 1.0f / ptarg->fbh;
// tex coords
v = DXVEC4(fiw*(float)texframe.tw, fih*(float)(dh), fiw*(float)(pfb->DBX), fih*((float)dby-0.5f));
SETCONSTF(GPU_BITBLTTEX, v);
// dest rect
v.x = 1;
v.y = dh/(float)texframe.th;
v.z = 0;
v.w = 1-v.y;
if( movy > 0 )
v.w -= movy/(float)texframe.th;
if (bInterlace && interlace == (conf.interlace&1) ) {
// move down by 1 pixel
v.w += 1.0f / (float)dh;
}
AdjustTransToAspect(v, (conf.options&GSOPTION_WIDESCREEN)?960:640, (conf.options&GSOPTION_WIDESCREEN)?540:480);
SETCONSTF(GPU_BITBLTPOS, v);
// use GPU_INVTEXDIMS to store inverse texture dims
v.x = fiw;
v.y = fih;
v.z = 0;
SETCONSTF(GPU_INVTEXDIMS, v);
// inside vb[0]'s target area, so render that region only
pd3dDevice->SetTexture(SAMP_FINAL, ptarg->ptex);
pd3dDevice->SetPixelShader(ppsCRTCTarg[bInterlace]);
pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);
if( abs(dh - (int)texframe.th) <= 1 ) {
bSkip = 1;
break;
}
if( abs(dh - (int)ptarg->fbh) <= 1 ) {
it = listTargs.erase(it);
continue;
}
}
}
++it;
}
if( !bSkip ) {
for(list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
(*it)->Resolve();
// context has to be 0
SetTexVariablesInt(0, 2, texframe, g_MemTargs.GetMemoryTarget(texframe, 1), 1);
if( g_bSaveFinalFrame )
SaveTex(&texframe, g_bSaveFinalFrame-1>0);
// finally render from the memory (note that the stencil buffer will keep previous regions)
v = DXVEC4(1,1,0,0);
if (bInterlace && interlace == (conf.interlace)) {
// move down by 1 pixel
v.w += 1.0f / (float)texframe.th;
}
AdjustTransToAspect(v, (conf.options&GSOPTION_WIDESCREEN)?960:640, (conf.options&GSOPTION_WIDESCREEN)?540:480);
SETCONSTF(GPU_BITBLTPOS, v);
v = DXVEC4(texframe.tw,texframe.th,-0.5f,-0.5f);
SETCONSTF(GPU_BITBLTTEX, v);
// use GPU_INVTEXDIMS to store inverse texture dims
v.x = 1.0f / (float)texframe.tw;
v.y = 1.0f / (float)texframe.th;
v.z = 0;//-0.5f * v.x;
v.w = -0.5f * v.y;
SETCONSTF(GPU_INVTEXDIMS, v);
pd3dDevice->SetPixelShader(ppsCRTC[bInterlace]);
pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);
}
}
if(1) {// || !bInterlace) {
s_bBeginScene = FALSE;
ProcessMessages();
if( g_bMakeSnapshot ) {
RECT rctext;
char str[64];
rctext.left = 200; rctext.top = 15;
sprintf(str, "ZeroGS %d.%d.%d - %.1f fps %s", revision, build, minor, fFPS, s_frameskipping?" - frameskipping":"");
pSprite->Begin(D3DXSPRITE_ALPHABLEND|D3DXSPRITE_SORT_TEXTURE);
rctext.left += 1;
rctext.top += 1;
pFont->DrawText(pSprite, str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xff000000);
rctext.left -= 1;
rctext.top -= 1;
pFont->DrawText(pSprite, str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xffc0ffff);
pSprite->End();
}
if( g_bDisplayFPS ) {
RECT rctext;
char str[64];
rctext.left = 10; rctext.top = 10;
sprintf(str, "%.1f fps", fFPS);
pSprite->Begin(D3DXSPRITE_ALPHABLEND|D3DXSPRITE_SORT_TEXTURE);
rctext.left += 1;
rctext.top += 1;
pFont->DrawText(pSprite, str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xff000000);
rctext.left -= 1;
rctext.top -= 1;
pFont->DrawText(pSprite, str, -1, &rctext, DT_LEFT|DT_NOCLIP, 0xffc0ffff);
pSprite->End();
}
pd3dDevice->EndScene();
if( pd3dDevice->Present(NULL, NULL, NULL, NULL) == D3DERR_DEVICELOST ) {
// device is lost, need to recreate
DEBUG_LOG("device lost\n");
g_bIsLost = TRUE;
Reset();
return;
}
if( conf.options & GSOPTION_WIREFRAME ) {
// clear all targets
s_nWireframeCount = 1;
}
if( g_bMakeSnapshot ) {
if( SUCCEEDED(D3DXSaveSurfaceToFile(strSnapshot != ""?strSnapshot.c_str():"temp.jpg", (conf.options&GSOPTION_BMPSNAP)?D3DXIFF_BMP:D3DXIFF_JPG, psurfOrgTarg, NULL, NULL)) ) {
char str[255];
sprintf(str, "saved %s\n", strSnapshot.c_str());
AddMessage(str, 500);
}
g_bMakeSnapshot = 0;
}
if( s_avicapturing ) {
CaptureFrame();
}
if( s_nNewWidth >= 0 && s_nNewHeight >= 0 && !g_bIsLost ) {
Reset();
ChangeDeviceSize(s_nNewWidth, s_nNewHeight);
s_nNewWidth = s_nNewHeight = -1;
}
// switch the fbp lists
// s_nCurFBPSet ^= 1;
// s_setFBP[s_nCurFBPSet].clear();
//s_nCurFrameMap ^= 1;
//s_mapFrameHeights[s_nCurFrameMap].clear();
}
pd3dDevice->SetTexture(SAMP_FINAL, NULL); // d3d debug complains if not
g_MemTargs.DestroyCleared();
for(list<LPD3DTEX>::iterator it = s_vecTempTextures.begin(); it != s_vecTempTextures.end(); ++it)
(*it)->Release();
s_vecTempTextures.clear();
if( EXTWRITE->WRITE&1 ) {
WARN_LOG("EXTWRITE\n");
ExtWrite();
EXTWRITE->WRITE = 0;
}
if( conf.options & GSOPTION_WIREFRAME ) SETRS(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
SETRS(D3DRS_SCISSORTESTENABLE, TRUE);
if( icurctx >= 0 ) {
vb[icurctx].bVarsSetTarg = FALSE;
vb[icurctx].bVarsTexSync = FALSE;
vb[0].bVarsTexSync = FALSE;
}
// statistics
if( s_nWriteDepthCount > 0 ) {
assert( conf.mrtdepth );
if( --s_nWriteDepthCount <= 0 ) {
s_bWriteDepth = FALSE;
}
}
if( s_nWriteDestAlphaTest > 0 ) {
if( --s_nWriteDestAlphaTest <= 0 ) {
s_bDestAlphaTest = FALSE;
}
}
if( g_GameSettings & GAME_AUTORESET ) {
s_nResolveCounts[s_nCurResolveIndex] = s_nResolved;
s_nCurResolveIndex = (s_nCurResolveIndex+1)%ArraySize(s_nResolveCounts);
int total = 0;
for(int i = 0; i < ArraySize(s_nResolveCounts); ++i) total += s_nResolveCounts[i];
if( total / ArraySize(s_nResolveCounts) > 3 ) {
if( s_nLastResolveReset > (int)(fFPS * 8) ) {
// reset
DEBUG_LOG("ZeroGS: video mem reset\n");
s_nLastResolveReset = 0;
memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
s_RTs.ResolveAll();
s_RTs.Destroy();
s_DepthRTs.ResolveAll();
s_DepthRTs.Destroy();
vb[0].prndr = NULL; vb[0].pdepth = NULL; vb[0].bNeedFrameCheck = 1; vb[0].bNeedZCheck = 1;
vb[1].prndr = NULL; vb[1].pdepth = NULL; vb[1].bNeedFrameCheck = 1; vb[1].bNeedZCheck = 1;
}
}
s_nLastResolveReset++;
}
if( s_nResolved > 8 ) s_nResolved = 2;
else if( s_nResolved > 0 ) --s_nResolved;
if( g_nDepthUsed > 0 ) --g_nDepthUsed;
s_ClutResolve = 0;
s_PSM8Resolve = 0;
g_nDepthUpdateCount = 0;
maxmin = 608;
}
//////////////////////////
// Internal Definitions //
//////////////////////////
__forceinline void MOVZ(VertexGPU *p, u32 gsz, const VB& curvb)
{
p->z = curvb.zprimmask==0xffff?min((u32)0xffff, gsz):gsz;
}
__forceinline void MOVFOG(VertexGPU *p, Vertex gsf)
{
p->f = ((s16)(gsf).f<<7)|0x7f;
}
__forceinline void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
{
int index = Index;
p->x = (((int)gs.gsvertex[index].x - curvb.offset.x)>>1)&0xffff;
p->y = (((int)gs.gsvertex[index].y - curvb.offset.y)>>1)&0xffff;
/*x = ((int)gs.gsvertex[index].x - curvb.offset.x);
y = ((int)gs.gsvertex[index].y - curvb.offset.y);
p.x = (x&0x7fff) | (x < 0 ? 0x8000 : 0);
p.y = (y&0x7fff) | (y < 0 ? 0x8000 : 0);*/
p->f = ((s16)gs.gsvertex[index].f<<7)|0x7f;
MOVZ(p, gs.gsvertex[index].z, curvb);
p->rgba = prim->iip ? gs.gsvertex[index].rgba : gs.rgba;
if ((g_GameSettings & GAME_TEXAHACK) && !(p->rgba&0xffffff))
p->rgba = 0;
if (prim->tme )
{
if( prim->fst )
{
p->s = (float)gs.gsvertex[index].u * fiTexWidth[prim->ctxt];
p->t = (float)gs.gsvertex[index].v * fiTexHeight[prim->ctxt];
p->q = 1;
}
else
{
p->s = gs.gsvertex[index].s;
p->t = gs.gsvertex[index].t;
p->q = gs.gsvertex[index].q;
}
}
}
#define OUTPUT_VERT(fn, vert, id) { \
fn("%c%d(%d): xyzf=(%4d,%4d,0x%x,%3d), rgba=0x%8.8x, stq = (%2.5f,%2.5f,%2.5f)\n", id==0?'*':' ', id, prim->prim, vert.x/8, vert.y/8, vert.z, vert.f/128, \
vert.rgba, Clamp(vert.s, -10, 10), Clamp(vert.t, -10, 10), Clamp(vert.q, -10, 10)); \
} \
void ZeroGS::KickPoint()
{
assert( gs.primC >= 1 );
VB& curvb = vb[prim->ctxt];
if (curvb.bNeedTexCheck) curvb.FlushTexData();
if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp )
{
assert( vb[prim->ctxt].dwCount == 0 );
Flush(!prim->ctxt);
}
if( curvb.dwCount >= POINT_BUFFERFLUSH)
Flush(prim->ctxt);
curvb.Lock();
int last = (gs.primIndex+2)%ArraySize(gs.gsvertex);
VertexGPU* p = curvb.pbuf+curvb.dwCount;
SET_VERTEX(&p[0], last, curvb);
curvb.dwCount++;
#ifdef PRIM_LOG
OUTPUT_VERT(PRIM_LOG, p[0], 0);
#endif
}
void ZeroGS::KickLine()
{
assert( gs.primC >= 2 );
VB& curvb = vb[prim->ctxt];
if( curvb.bNeedTexCheck )
curvb.FlushTexData();
if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp )
{
assert( vb[prim->ctxt].dwCount == 0 );
Flush(!prim->ctxt);
}
if( curvb.dwCount >= POINT_BUFFERFLUSH/2 )
Flush(prim->ctxt);
curvb.Lock();
int next = (gs.primIndex+1)%ArraySize(gs.gsvertex);
int last = (gs.primIndex+2)%ArraySize(gs.gsvertex);
VertexGPU* p = curvb.pbuf+curvb.dwCount*2;
SET_VERTEX(&p[0], next, curvb);
SET_VERTEX(&p[1], last, curvb);
curvb.dwCount++;
#ifdef PRIM_LOG
OUTPUT_VERT(PRIM_LOG, p[0], 0);
OUTPUT_VERT(PRIM_LOG, p[1], 1);
#endif
}
void ZeroGS::KickTriangle()
{
assert( gs.primC >= 3 );
VB& curvb = vb[prim->ctxt];
if (curvb.bNeedTexCheck) curvb.FlushTexData();
if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp )
{
assert( vb[prim->ctxt].dwCount == 0 );
Flush(!prim->ctxt);
}
if( curvb.dwCount >= POINT_BUFFERFLUSH/3 )
Flush(prim->ctxt);
curvb.Lock();
VertexGPU* p = curvb.pbuf+curvb.dwCount*3;
SET_VERTEX(&p[0], 0, curvb);
SET_VERTEX(&p[1], 1, curvb);
SET_VERTEX(&p[2], 2, curvb);
curvb.dwCount++;
#ifdef PRIM_LOG
OUTPUT_VERT(PRIM_LOG, p[0], 0);
OUTPUT_VERT(PRIM_LOG, p[1], 1);
OUTPUT_VERT(PRIM_LOG, p[2], 2);
#endif
}
void ZeroGS::KickTriangleFan()
{
assert( gs.primC >= 3 );
VB& curvb = vb[prim->ctxt];
if (curvb.bNeedTexCheck) curvb.FlushTexData();
if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp )
{
assert( vb[prim->ctxt].dwCount == 0 );
Flush(!prim->ctxt);
}
if( curvb.dwCount >= POINT_BUFFERFLUSH/3 )
Flush(prim->ctxt);
curvb.Lock();
VertexGPU* p = curvb.pbuf+curvb.dwCount*3;
SET_VERTEX(&p[0], 0, curvb);
SET_VERTEX(&p[1], 1, curvb);
SET_VERTEX(&p[2], 2, curvb);
curvb.dwCount++;
// add 1 to skip the first vertex
if( gs.primIndex == gs.nTriFanVert )
gs.primIndex = (gs.primIndex+1)%ArraySize(gs.gsvertex);
#ifdef PRIM_LOG
OUTPUT_VERT(PRIM_LOG, p[0], 0);
OUTPUT_VERT(PRIM_LOG, p[1], 1);
OUTPUT_VERT(PRIM_LOG, p[2], 2);
#endif
}
__forceinline void SetKickVertex(VertexGPU *p, Vertex v, int next, const VB& curvb)
{
SET_VERTEX(p, next, curvb);
MOVZ(p, v.z, curvb);
MOVFOG(p, v);
}
void ZeroGS::KickSprite()
{
assert( gs.primC >= 2 );
VB& curvb = vb[prim->ctxt];
if( curvb.bNeedTexCheck )
curvb.FlushTexData();
if( !(g_GameSettings&GAME_DOPARALLELCTX) && vb[!prim->ctxt].dwCount > 0 && vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp )
{
assert( vb[prim->ctxt].dwCount == 0 );
Flush(!prim->ctxt);
}
if (curvb.dwCount >= POINT_BUFFERFLUSH/3) Flush(prim->ctxt);
curvb.Lock();
int next = (gs.primIndex+1)%ArraySize(gs.gsvertex);
int last = (gs.primIndex+2)%ArraySize(gs.gsvertex);
// sprite is too small and AA shows lines (tek4)
if( s_AAx )
{
gs.gsvertex[last].x += 4;
if (s_AAy) gs.gsvertex[last].y += 4;
}
// might be bad sprite (KH dialog text)
//if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y )
// return;
VertexGPU* p = curvb.pbuf+curvb.dwCount*3;
SetKickVertex(&p[0], gs.gsvertex[last], next, curvb);
SetKickVertex(&p[3], gs.gsvertex[last], next, curvb);
SetKickVertex(&p[1], gs.gsvertex[last], last, curvb);
SetKickVertex(&p[4], gs.gsvertex[last], last, curvb);
if (g_MaxRenderedHeight < p[0].y) g_MaxRenderedHeight = p[0].y;
if (g_MaxRenderedHeight < p[1].y) g_MaxRenderedHeight = p[1].y;
SetKickVertex(&p[2], gs.gsvertex[last], next, curvb);
p[2].s = p[1].s;
p[2].x = p[1].x;
SetKickVertex(&p[5], gs.gsvertex[last], last, curvb);
p[5].s = p[0].s;
p[5].x = p[0].x;
curvb.dwCount += 2;
#ifdef PRIM_LOG
OUTPUT_VERT(PRIM_LOG, p[0], 0);
OUTPUT_VERT(PRIM_LOG, p[1], 1);
#endif
}
void ZeroGS::KickDummy()
{
//GREG_LOG("Kicking bad primitive: %.8x\n", *(u32*)prim);
}
__forceinline void ZeroGS::RenderFBA(const VB& curvb)
{
// add fba to all pixels
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_CLEAR);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_STENCILFAIL, D3DSTENCILOP_ZERO);
SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS);
SETRS(D3DRS_STENCILREF, STENCIL_FBA);
SETRS(D3DRS_ZENABLE, FALSE);
SETRS(D3DRS_ZWRITEENABLE, FALSE);
SETRS(D3DRS_COLORWRITEENABLE, 0);
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0);
else pd3dDevice->SetRenderTarget(1, NULL);
}
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_ALPHATESTENABLE, TRUE);
SETRS(D3DRS_ALPHAFUNC, D3DCMP_GREATEREQUAL);
SETRS(D3DRS_ALPHAREF, 0xff);
DXVEC4 v;
v.x = 1; v.y = 2; v.z = 0; v.w = 0;
SETCONSTF(GPU_ONECOLOR, v);
DRAW();
if( !curvb.test.ate )
SETRS(D3DRS_ALPHATESTENABLE, FALSE);
else {
SETRS(D3DRS_ALPHAFUNC, g_dwAlphaCmp[curvb.test.atst]);
SETRS(D3DRS_ALPHAREF, b2XAlphaTest ? min(255,2 * curvb.test.aref) : curvb.test.aref);
}
// reset (not necessary)
SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite);
SETRS(D3DRS_STENCILFAIL, D3DSTENCILOP_KEEP);
if( !curvb.zbuf.zmsk ) {
SETRS(D3DRS_ZWRITEENABLE, TRUE);
assert( curvb.pdepth != NULL );
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf);
else pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf);
}
}
SETRS(D3DRS_ZENABLE, curvb.test.zte);
}
__forceinline void ZeroGS::RenderAlphaTest(const VB& curvb)
{
if( !g_bUpdateStencil ) return;
if( curvb.test.ate ) {
if( curvb.test.afail == 1 ) SETRS(D3DRS_ALPHATESTENABLE, FALSE);
}
SETRS(D3DRS_ZWRITEENABLE, FALSE);
SETRS(D3DRS_COLORWRITEENABLE, 0);
DXVEC4 v;
v.x = 1; v.y = 2; v.z = 0; v.w = 0;
SETCONSTF(GPU_ONECOLOR, v);
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0);
else pd3dDevice->SetRenderTarget(1,NULL);
}
// or a 1 to the stencil buffer wherever alpha passes
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS);
SETRS(D3DRS_STENCILENABLE, TRUE);
if( !s_bDestAlphaTest ) {
// clear everything
SETRS(D3DRS_STENCILREF, 0);
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_CLEAR);
SETRS(D3DRS_ALPHATESTENABLE, FALSE);
DRAW();
if( curvb.test.ate && curvb.test.afail != 1 && USEALPHATESTING)
SETRS(D3DRS_ALPHATESTENABLE, TRUE);
}
if( curvb.test.ate && curvb.test.atst>1 && curvb.test.aref > 0x80) {
v.x = 1; v.y = 1; v.z = 0; v.w = 0;
SETCONSTF(GPU_ONECOLOR, v);
SETRS(D3DRS_ALPHAREF, curvb.test.aref);
}
SETRS(D3DRS_STENCILREF, STENCIL_SPECIAL);
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_SPECIAL);
SETRS(D3DRS_ZENABLE, FALSE);
DRAW();
if( curvb.test.zte ) SETRS(D3DRS_ZENABLE, TRUE);
SETRS(D3DRS_ALPHATESTENABLE, 0);
SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite);
if( !curvb.zbuf.zmsk ) {
SETRS(D3DRS_ZWRITEENABLE, TRUE);
// set rt next level
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf);
else pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf);
}
}
}
__forceinline void ZeroGS::RenderStencil(const VB& curvb, DWORD dwUsingSpecialTesting)
{
//NOTE: This stencil hack for dest alpha testing ONLY works when
// the geometry in one DrawPrimitive call does not overlap
// mark the stencil buffer for the new data's bits (mark 4 if alpha is >= 0xff)
// mark 4 if a pixel was written (so that the stencil buf can be changed with new values)
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_PIXELWRITE);
dwStencilMask = (curvb.test.date?STENCIL_ALPHABIT:0)|(dwUsingSpecialTesting?STENCIL_SPECIAL:0);
SETRS(D3DRS_STENCILMASK, dwStencilMask);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_STENCILFUNC, dwStencilMask ? D3DCMP_EQUAL : D3DCMP_ALWAYS);
dwStencilRef = curvb.test.date*curvb.test.datm|STENCIL_PIXELWRITE|(dwUsingSpecialTesting?STENCIL_SPECIAL:0);
SETRS(D3DRS_STENCILREF, dwStencilRef);
}
__forceinline void ZeroGS::ProcessStencil(const VB& curvb)
{
assert( !curvb.fba.fba );
// set new alpha bit
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_ALPHABIT);
SETRS(D3DRS_STENCILMASK, STENCIL_PIXELWRITE|STENCIL_FBA);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL);
SETRS(D3DRS_STENCILREF, STENCIL_PIXELWRITE);
SETRS(D3DRS_ZENABLE, FALSE);
SETRS(D3DRS_ZWRITEENABLE, FALSE);
SETRS(D3DRS_COLORWRITEENABLE, 0);
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0);
else pd3dDevice->SetRenderTarget(1, NULL);
}
SETRS(D3DRS_ALPHATESTENABLE, 0);
pd3dDevice->SetPixelShader(ppsOne);
DRAW();
// process when alpha >= 0xff
SETRS(D3DRS_STENCILREF, STENCIL_PIXELWRITE|STENCIL_FBA|STENCIL_ALPHABIT);
SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL);
DRAW();
// clear STENCIL_PIXELWRITE bit
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_CLEAR);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_STENCILFUNC, D3DCMP_ALWAYS);
SETRS(D3DRS_STENCILREF, 0);
DRAW();
// restore state
SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite);
if( curvb.test.ate && USEALPHATESTING)
SETRS(D3DRS_ALPHATESTENABLE, TRUE);
if( !curvb.zbuf.zmsk ) {
SETRS(D3DRS_ZWRITEENABLE, TRUE);
if( s_bWriteDepth ) {
assert( curvb.pdepth != NULL );
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf);
else pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf);
}
}
SETRS(D3DRS_ZENABLE, curvb.test.zte);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_KEEP);
}
__forceinline void ZeroGS::ProcessFBA(const VB& curvb)
{
if( (curvb.frame.fbm&0x80000000) ) return;
// add fba to all pixels that were written and alpha was less than 0xff
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_ALPHABIT);
SETRS(D3DRS_STENCILMASK, STENCIL_PIXELWRITE|STENCIL_FBA);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_REPLACE);
SETRS(D3DRS_STENCILFUNC, D3DCMP_EQUAL);
SETRS(D3DRS_STENCILREF, STENCIL_FBA|STENCIL_PIXELWRITE|STENCIL_ALPHABIT);
SETRS(D3DRS_ZENABLE, FALSE);
SETRS(D3DRS_ZWRITEENABLE, FALSE);
SETRS(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA);
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0);
else pd3dDevice->SetRenderTarget(1, NULL);
}
// processes the pixels with ALPHA < 0x80*2
SETRS(D3DRS_ALPHATESTENABLE, TRUE);
SETRS(D3DRS_ALPHAFUNC, D3DCMP_LESSEQUAL);
SETRS(D3DRS_ALPHAREF, 0xff);
// add 1 to dest
SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE);
SETRS(D3DRS_DESTBLENDALPHA, D3DBLEND_ONE);
SETRS(D3DRS_BLENDOPALPHA, D3DBLENDOP_ADD);
float f = 1;
SETCONSTF(GPU_ONECOLOR, &f);
pd3dDevice->SetPixelShader(ppsOne);
DRAW();
SETRS(D3DRS_ALPHATESTENABLE, FALSE);
// reset bits
SETRS(D3DRS_STENCILWRITEMASK, STENCIL_CLEAR);
SETRS(D3DRS_STENCILMASK, STENCIL_PIXELWRITE|STENCIL_FBA);
SETRS(D3DRS_STENCILPASS, D3DSTENCILOP_ZERO);
SETRS(D3DRS_STENCILFAIL, D3DSTENCILOP_KEEP);
SETRS(D3DRS_STENCILFUNC, D3DCMP_GREATER);
SETRS(D3DRS_STENCILREF, 0);
DRAW();
if( curvb.test.atst && USEALPHATESTING) {
SETRS(D3DRS_ALPHATESTENABLE, TRUE);
SETRS(D3DRS_ALPHAFUNC, g_dwAlphaCmp[curvb.test.atst]);
SETRS(D3DRS_ALPHAREF, b2XAlphaTest ? min(255,2 * curvb.test.aref) : curvb.test.aref);
}
// restore (SetAlphaVariables)
if( !bNeedAlphaColor ) SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE);//(bNeedBlendFactorInAlpha ? D3DBLEND_ZERO : D3DBLEND_ONE));
SETRS(D3DRS_DESTBLENDALPHA, D3DBLEND_ZERO);//bNeedBlendFactorInAlpha ? D3DBLEND_ONE : D3DBLEND_ZERO);
if(bNeedAlphaColor && vAlphaBlendColor.y<0) SETRS(D3DRS_BLENDOPALPHA, D3DBLENDOP_REVSUBTRACT);
// reset (not necessary)
SETRS(D3DRS_COLORWRITEENABLE, s_dwColorWrite);
if( !curvb.zbuf.zmsk ) {
SETRS(D3DRS_ZWRITEENABLE, TRUE);
if( s_bWriteDepth ) {
if( bIndepWriteMasks ) SETRS(D3DRS_COLORWRITEENABLE1, 0xf);
else pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf);
}
}
SETRS(D3DRS_ZENABLE, curvb.test.zte);
}
inline void ZeroGS::SetContextTarget(int context)
{
VB& curvb = vb[context];
if( curvb.prndr == NULL )
curvb.prndr = s_RTs.GetTarg(curvb.frame, 0, GET_MAXHEIGHT(curvb.gsfb.fbp, curvb.gsfb.fbw, curvb.gsfb.psm));
// make sure targets are valid
if( curvb.pdepth == NULL ) {
frameInfo f;
f.fbp = curvb.zbuf.zbp;
f.fbw = curvb.frame.fbw;
f.fbh = curvb.prndr->fbh;
f.psm = curvb.zbuf.psm;
f.fbm = 0;
curvb.pdepth = (CDepthTarget*)s_DepthRTs.GetTarg(f, CRenderTargetMngr::TO_DepthBuffer|CRenderTargetMngr::TO_StrictHeight|
(curvb.zbuf.zmsk?CRenderTargetMngr::TO_Virtual:0), GET_MAXHEIGHT(curvb.zbuf.zbp, curvb.gsfb.fbw, 0));
}
assert( curvb.prndr != NULL && curvb.pdepth != NULL );
assert( curvb.pdepth->fbh == curvb.prndr->targheight );
// if( curvb.pdepth->fbh != curvb.prndr->fbh ) {
//
// s_DepthRTs.DestroyTarg(curvb.pdepth);
// ERROR_LOG("ZeroGS: render and depth heights different: %x %x\n", curvb.prndr->fbh, curvb.pdepth->fbh);
// frameInfo f;
// f.fbp = curvb.zbuf.zbp;
// f.fbw = curvb.frame.fbw;
// f.fbh = curvb.prndr->fbh;
// f.psm = curvb.zbuf.psm;
// f.fbm = 0;
// curvb.pdepth = (CDepthTarget*)s_DepthRTs.GetTarg(f, CRenderTargetMngr::TO_DepthBuffer|CRenderTargetMngr::TO_StrictHeight|
// (curvb.zbuf.zmsk?CRenderTargetMngr::TO_Virtual:0), GET_MAXHEIGHT(curvb.zbuf.zbp, curvb.gsfb.fbw, 0));
// }
if( curvb.pdepth->status & CRenderTarget::TS_Virtual) {
if( !curvb.zbuf.zmsk ) {
CRenderTarget* ptemp = s_DepthRTs.Promote(curvb.pdepth->fbp|(curvb.pdepth->fbw<<16));
assert( ptemp == curvb.pdepth );
}
else
curvb.pdepth->status &= ~CRenderTarget::TS_NeedUpdate;
}
BOOL bSetTarg = 1;
if( curvb.pdepth->status & CRenderTarget::TS_NeedUpdate ) {
assert( !(curvb.pdepth->status & CRenderTarget::TS_Virtual) );
// don't update if virtual
curvb.pdepth->Update(context, curvb.prndr);
bSetTarg = 0;
}
if( curvb.prndr->status & CRenderTarget::TS_NeedUpdate ) {
if( s_bWriteDepth ) {
if( bSetTarg ) {
pd3dDevice->SetRenderTarget(1, curvb.pdepth->psurf);
pd3dDevice->SetDepthStencilSurface(curvb.pdepth->pdepth);
}
}
else if( bSetTarg )
pd3dDevice->SetDepthStencilSurface(curvb.pdepth->pdepth);
curvb.prndr->Update(context, curvb.pdepth);
// note, targ already set
}
else {
//if( (vb[0].prndr != vb[1].prndr && vb[!context].bVarsSetTarg) || !vb[context].bVarsSetTarg )
pd3dDevice->SetRenderTarget(0, curvb.prndr->psurf);
//if( bSetTarg && ((vb[0].pdepth != vb[1].pdepth && vb[!context].bVarsSetTarg) || !vb[context].bVarsSetTarg) )
curvb.pdepth->SetDepthTarget();
if( s_ptexCurSet[0] == curvb.prndr->ptex ) {
s_ptexCurSet[0] = NULL;
pd3dDevice->SetTexture(SAMP_MEMORY0, NULL);
}
if( s_ptexCurSet[1] == curvb.prndr->ptex ) {
s_ptexCurSet[1] = NULL;
pd3dDevice->SetTexture(SAMP_MEMORY1, NULL);
}
curvb.prndr->SetViewport();
}
curvb.prndr->SetTarget(curvb.frame.fbp, curvb.scissor, context);
if( (curvb.zbuf.zbp-curvb.pdepth->fbp) != (curvb.frame.fbp - curvb.prndr->fbp) && curvb.test.zte ) {
WARN_LOG("frame and zbuf not aligned\n");
}
curvb.bVarsSetTarg = TRUE;
if( vb[!context].prndr != curvb.prndr )
vb[!context].bVarsSetTarg = FALSE;
assert( !(curvb.prndr->status&CRenderTarget::TS_NeedUpdate) );
assert( curvb.pdepth == NULL || !(curvb.pdepth->status&CRenderTarget::TS_NeedUpdate) );
}
void ZeroGS::SetTexVariables(int context)
{
if( !vb[context].curprim.tme ) {
return;
}
assert( !vb[context].bNeedTexCheck );
DXVEC4 v, v2;
tex0Info& tex0 = vb[context].tex0;
float fw = (float)tex0.tw;
float fh = (float)tex0.th;
if( !vb[context].bTexConstsSync ) {
// alpha and texture highlighting
DXVEC4 valpha, valpha2;
// if clut, use the frame format
int psm = tex0.psm;
if( PSMT_ISCLUT(tex0.psm) ) psm = tex0.cpsm;
int nNeedAlpha = (psm == 1 || psm == 2 || psm == 10);
DXVEC4 vblack;
vblack.x = vblack.y = vblack.z = vblack.w = 10;
switch(tex0.tfx) {
case 0:
valpha.z = 0; valpha.w = 0;
valpha2.x = 0; valpha2.y = 0;
valpha2.z = 2; valpha2.w = 1;
break;
case 1:
valpha.z = 0; valpha.w = 1;
valpha2.x = 1; valpha2.y = 0;
valpha2.z = 0; valpha2.w = 0;
break;
case 2:
valpha.z = 1; valpha.w = 1.0f;
valpha2.x = 0; valpha2.y = tex0.tcc ? 1.0f : 0.0f;
valpha2.z = 2; valpha2.w = 0;
break;
case 3:
valpha.z = 1; valpha.w = tex0.tcc ? 0.0f : 1.0f;
valpha2.x = 0; valpha2.y = tex0.tcc ? 1.0f : 0.0f;
valpha2.z = 2; valpha2.w = 0;
break;
default: __assume(0);
}
if( tex0.tcc ) {
if( tex0.tfx == 1 ) {
//mode.x = 10;
valpha.z = 0; valpha.w = 0;
valpha2.x = 1; valpha2.y = 1;
valpha2.z = 0; valpha2.w = 0;
}
if( nNeedAlpha ) {
if( tex0.tfx == 0 ) {
// make sure alpha is mult by two when the output is Cv = Ct*Cf
valpha.x = 2*gs.texa.fta[0];
// if 24bit, always choose ta[0]
valpha.y = 2*gs.texa.fta[psm != 1];
valpha.y -= valpha.x;
}
else {
valpha.x = gs.texa.fta[0];
// if 24bit, always choose ta[0]
valpha.y = gs.texa.fta[psm != 1];
valpha.y -= valpha.x;
}
// need black detection
if( gs.texa.aem && psm == PSMCT24 )
vblack.w = 0;
}
else {
if( tex0.tfx == 0 ) {
valpha.x = 0;
valpha.y = 2;
}
else {
valpha.x = 0;
valpha.y = 1;
}
}
}
else {
// reset alpha to color
valpha.x = valpha.y = 0;
valpha.w = 1;
}
SETCONSTF(GPU_TEXALPHA0+context, &valpha);
SETCONSTF(GPU_TEXALPHA20+context, &valpha2);
if( tex0.tcc && gs.texa.aem && (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S) )
SETCONSTF(GPU_TESTBLACK0+context, &vblack);
// clamp relies on texture width
{
clampInfo* pclamp = &ZeroGS::vb[context].clamp;
DXVEC4 v, v2;
v.x = v.y = 0;
LPD3DTEX* ptex = ZeroGS::vb[context].ptexClamp;
ptex[0] = ptex[1] = NULL;
float fw = ZeroGS::vb[context].tex0.tw;
float fh = ZeroGS::vb[context].tex0.th;
switch(pclamp->wms) {
case 0:
v2.x = -1e10; v2.z = 1e10;
break;
case 1: // pclamp
// suikoden5 movie text
v2.x = 0; v2.z = 1-0.5f/fw;
break;
case 2: // reg pclamp
v2.x = (pclamp->minu+0.5f)/fw; v2.z = (pclamp->maxu-0.5f)/fw;
break;
case 3: // region rep x
v.x = 0.9999f;
v.z = fw / (float)GPU_TEXMASKWIDTH;
v2.x = (float)GPU_TEXMASKWIDTH / fw;
v2.z = pclamp->maxu / fw;
if( pclamp->minu != g_PrevBitwiseTexX ) {
g_PrevBitwiseTexX = pclamp->minu;
ptex[0] = ZeroGS::s_BitwiseTextures.GetTex(pclamp->minu, NULL);
}
break;
default: __assume(0);
}
switch(pclamp->wmt) {
case 0:
v2.y = -1e10; v2.w = 1e10;
break;
case 1: // pclamp
// suikoden5 movie text
v2.y = 0; v2.w = 1-0.5f/fh;
break;
case 2: // reg pclamp
v2.y = (pclamp->minv+0.5f)/fh; v2.w = (pclamp->maxv-0.5f)/fh;
break;
case 3: // region rep y
v.y = 0.9999f;
v.w = fh / (float)GPU_TEXMASKWIDTH;
v2.y = (float)GPU_TEXMASKWIDTH / fh;
v2.w = pclamp->maxv / fh;
if( pclamp->minv != g_PrevBitwiseTexY ) {
g_PrevBitwiseTexY = pclamp->minv;
ptex[1] = ZeroGS::s_BitwiseTextures.GetTex(pclamp->minv, ptex[0]);
}
break;
default: __assume(0);
}
SETCONSTF(GPU_TEXWRAPMODE0+context, v);
SETCONSTF(GPU_CLAMPEXTS0+context, v2);
}
vb[context].bTexConstsSync = TRUE;
}
if(s_bTexFlush ) {
if( PSMT_ISCLUT(tex0.psm) )
texClutWrite(context);
else
s_bTexFlush = FALSE;
}
}
void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, CMemoryTarget* pmemtarg, int force)
{
DXVEC4 v;
assert( pmemtarg != NULL );
float fw = (float)tex0.tw;
float fh = (float)tex0.th;
if( bilinear > 1 || (bilinear && conf.bilinear)) {
v.x = (float)fw;
v.y = (float)fh;
v.z = 1.0f / (float)fw;
v.w = 1.0f / (float)fh;
SETCONSTF(GPU_REALTEXDIMS0+context, v);
}
if( m_Blocks[tex0.psm].bpp == 0 ) {
ERROR_LOG("ZeroGS: Undefined tex psm 0x%x!\n", tex0.psm);
return;
}
const BLOCK& b = m_Blocks[tex0.psm];
float fbw = (float)tex0.tbw;
v.x = b.vTexDims.x * fw;
v.y = b.vTexDims.y * fh;
v.z = (float)BLOCK_TEXWIDTH*(0.002f / 64.0f + 0.01f/128.0f);
v.w = (float)BLOCK_TEXHEIGHT*0.2f/512.0f;
if( bilinear > 1 || (conf.bilinear && bilinear) ) {
v.x *= 1/128.0f;
v.y *= 1/512.0f;
v.z *= 1/128.0f;
v.w *= 1/512.0f;
}
SETCONSTF(GPU_TEXDIMS0+context, v);
float g_fitexwidth = g_fiGPU_TEXWIDTH/(float)pmemtarg->widthmult;
float g_texwidth = GPU_TEXWIDTH*(float)pmemtarg->widthmult;
SETCONSTF(GPU_TEXBLOCK0+context, &b.vTexBlock.x);
float fpage = tex0.tbp0*(64.0f*g_fitexwidth) + 0.05f * g_fitexwidth;
float fpageint = floorf(fpage);
int starttbp = (int)fpage;
// 2048 is number of words to span one page
float fblockstride = (2048.0f /(float)(g_texwidth*BLOCK_TEXWIDTH)) * b.vTexDims.x * fbw;
assert( fblockstride >= 1.0f );
v.x = (float)(2048 * g_fitexwidth);
v.y = fblockstride;
v.z = g_fBlockMult/(float)pmemtarg->widthmult;
v.w = fpage-fpageint;
if( g_fBlockMult > 1 ) {
// make sure to divide by mult (since the G16R16 texture loses info)
v.z *= b.bpp * (1/32.0f);
}
SETCONSTF(GPU_TEXOFFSET0+context, v);
v.y = (float)1.0f / (float)((pmemtarg->realheight+pmemtarg->widthmult-1)/pmemtarg->widthmult);
v.x = (fpageint-(float)pmemtarg->realy/(float)pmemtarg->widthmult+0.5f)*v.y;
SETCONSTF(GPU_PAGEOFFSET0+context, v);
if( force ) {
pd3dDevice->SetTexture(SAMP_MEMORY0+context, pmemtarg->ptex);
s_ptexCurSet[context] = pmemtarg->ptex;
}
else s_ptexNextSet[context] = pmemtarg->ptex;
vb[context].pmemtarg = pmemtarg;
vb[context].bVarsTexSync = FALSE;
}
// assumes texture factor is unused
#define SET_ALPHA_COLOR_FACTOR(sign) { \
switch(a.c) { \
case 0: \
bNeedAlphaColor = 1; \
vAlphaBlendColor.y = (sign) ? 2.0f*255.0f/256.0f : -2.0f*255.0f/256.0f; \
SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE); \
SETRS(D3DRS_BLENDOPALPHA, (sign) ? D3DBLENDOP_ADD : D3DBLENDOP_REVSUBTRACT); \
break; \
case 1: \
/* if in 24 bit mode, dest alpha should be one */ \
switch(vb[icurctx].prndr->psm&0xf) { \
case 0: \
bDestAlphaColor = (a.d!=2)&&((a.a==a.d)||(a.b==a.d)); \
break; \
case 1: \
/* dest alpha should be one */ \
bDestAlphaColor = 2; \
break; \
/* default: 16bit surface, so returned alpha is ok */ \
} \
break; \
\
case 2: \
bNeedBlendFactorInAlpha = 1; /* should disable alpha channel writing */ \
bNeedAlphaColor = 1; \
vAlphaBlendColor.y = 0; \
vAlphaBlendColor.w = (sign) ? (float)a.fix * (2.0f/255.0f) : (float)a.fix * (-2.0f/255.0f); \
usec = 0; /* change so that alpha comes from source*/ \
break; \
} \
} \
//if( a.fix <= 0x80 ) { \
// dwTemp = (a.fix*2)>255?255:(a.fix*2); \
// dwTemp = dwTemp|(dwTemp<<8)|(dwTemp<<16)|0x80000000; \
// DEBUG_LOG("bfactor: %8.8x\n", dwTemp); \
// SETRS(D3DRS_BLENDFACTOR, dwTemp); \
// } \
// else { \
void ZeroGS::ResetAlphaVariables()
{
s_bAlphaSet = FALSE;
}
void ZeroGS::SetAlphaVariables(const alphaInfo& a)
{
SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING); // always set
if( s_bAlphaSet && a.abcd == s_alphaInfo.abcd && a.fix == s_alphaInfo.fix ) {
return;
}
// TODO: negative color when not clamping turns to positive???
g_vars._bAlphaState = 0; // set all to zero
bNeedBlendFactorInAlpha = 0;
b2XAlphaTest = 1;
DWORD dwTemp = 0xffffffff;
// default
SETRS(D3DRS_SRCBLENDALPHA, D3DBLEND_ONE);
SETRS(D3DRS_DESTBLENDALPHA, D3DBLEND_ZERO);
SETRS(D3DRS_BLENDOPALPHA, D3DBLENDOP_ADD);
s_alphaInfo = a;
vAlphaBlendColor = DXVEC4(1,2*255.0f/256.0f,0,0);
DWORD usec = a.c;
if( a.a == a.b ) { // just d remains
SETRS(D3DRS_ALPHABLENDENABLE, USEALPHABLENDING);
if( a.d == 0 ) {
SETRS(D3DRS_ALPHABLENDENABLE, 0);
}
else {
SETRS(D3DRS_DESTBLEND, a.d == 1 ? D3DBLEND_ONE : D3DBLEND_ZERO);
SETRS(D3DRS_SRCBLEND, D3DBLEND_ZERO);
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
}
goto EndSetAlpha;
}
else if( a.d == 2 ) { // zero
if( a.a == 2 ) {
// zero all color
SETRS(D3DRS_SRCBLEND, D3DBLEND_ZERO);
SETRS(D3DRS_DESTBLEND, D3DBLEND_ZERO);
goto EndSetAlpha;
}
else if( a.b == 2 ) {
//b2XAlphaTest = 1;
SET_ALPHA_COLOR_FACTOR(1);
if( bDestAlphaColor == 2 ) {
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
SETRS(D3DRS_SRCBLEND, a.a == 0 ? D3DBLEND_ONE : D3DBLEND_ZERO);
SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_ZERO : D3DBLEND_ONE);
}
else {
if( bNeedAlphaColor ) bAlphaClamping = 2;
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
SETRS(D3DRS_SRCBLEND, a.a == 0 ? blendalpha[usec] : D3DBLEND_ZERO);
SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_ZERO : blendalpha[usec]);
}
goto EndSetAlpha;
}
// nothing is zero, so must do some real blending
//b2XAlphaTest = 1;
bAlphaClamping = 3;
SET_ALPHA_COLOR_FACTOR(1);
SETRS(D3DRS_BLENDOP, a.a == 0 ? D3DBLENDOP_SUBTRACT : D3DBLENDOP_REVSUBTRACT);
SETRS(D3DRS_SRCBLEND, bDestAlphaColor == 2 ? D3DBLEND_ONE : blendalpha[usec]);
SETRS(D3DRS_DESTBLEND, bDestAlphaColor == 2 ? D3DBLEND_ONE : blendalpha[usec]);
}
else if( a.a == 2 ) { // zero
//b2XAlphaTest = 1;
bAlphaClamping = 1; // min testing
SET_ALPHA_COLOR_FACTOR(1);
if( a.b == a.d ) {
// can get away with 1-A
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
SETRS(D3DRS_SRCBLEND, (a.b == 0 && bDestAlphaColor != 2) ? blendinvalpha[usec] : D3DBLEND_ZERO);
SETRS(D3DRS_DESTBLEND, (a.b == 0 || bDestAlphaColor == 2) ? D3DBLEND_ZERO : blendinvalpha[usec]);
}
else {
SETRS(D3DRS_BLENDOP, a.b==0 ? D3DBLENDOP_REVSUBTRACT : D3DBLENDOP_SUBTRACT);
SETRS(D3DRS_SRCBLEND, (a.b == 0 && bDestAlphaColor != 2) ? blendalpha[usec] : D3DBLEND_ONE);
SETRS(D3DRS_DESTBLEND, (a.b == 0 || bDestAlphaColor == 2 ) ? D3DBLEND_ONE : blendalpha[usec]);
}
}
else if( a.b == 2 ) {
bAlphaClamping = 2; // max testing
SET_ALPHA_COLOR_FACTOR(a.a!=a.d);
if( a.a == a.d ) {
// can get away with 1+A, but need to set alpha to negative
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
if( bDestAlphaColor == 2 ) {
assert(usec==1);
// all ones
bNeedAlphaColor = 1;
bNeedBlendFactorInAlpha = 1;
vAlphaBlendColor.y = 0;
vAlphaBlendColor.w = -1;
SETRS(D3DRS_SRCBLEND, a.a == 0 ? D3DBLEND_INVSRCALPHA : D3DBLEND_ZERO);
SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_ZERO : D3DBLEND_INVSRCALPHA);
}
else {
SETRS(D3DRS_SRCBLEND, a.a == 0 ? blendinvalpha[usec] : D3DBLEND_ZERO);
SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_ZERO : blendinvalpha[usec]);
}
}
else {
//b2XAlphaTest = 1;
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
SETRS(D3DRS_SRCBLEND, (a.a == 0 && bDestAlphaColor != 2) ? blendalpha[usec] : D3DBLEND_ONE);
SETRS(D3DRS_DESTBLEND, (a.a == 0 || bDestAlphaColor == 2) ? D3DBLEND_ONE : blendalpha[usec]);
}
}
else {
// all 3 components are valid!
bAlphaClamping = 3; // all testing
SET_ALPHA_COLOR_FACTOR(a.a!=a.d);
if( a.a == a.d ) {
// can get away with 1+A, but need to set alpha to negative
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
if( bDestAlphaColor == 2 ) {
assert(usec==1);
// all ones
bNeedAlphaColor = 1;
bNeedBlendFactorInAlpha = 1;
vAlphaBlendColor.y = 0;
vAlphaBlendColor.w = -1;
SETRS(D3DRS_SRCBLEND, a.a == 0 ? D3DBLEND_INVSRCALPHA : D3DBLEND_SRCALPHA);
SETRS(D3DRS_DESTBLEND, a.a == 0 ? D3DBLEND_SRCALPHA : D3DBLEND_INVSRCALPHA);
}
else {
SETRS(D3DRS_SRCBLEND, a.a == 0 ? blendinvalpha[usec] : blendalpha[usec]);
SETRS(D3DRS_DESTBLEND, a.a == 0 ? blendalpha[usec] : blendinvalpha[usec]);
}
}
else {
assert(a.b == a.d);
SETRS(D3DRS_BLENDOP, D3DBLENDOP_ADD);
if( bDestAlphaColor == 2 ) {
assert(usec==1);
// all ones
bNeedAlphaColor = 1;
bNeedBlendFactorInAlpha = 1;
vAlphaBlendColor.y = 0;
vAlphaBlendColor.w = 1;
SETRS(D3DRS_SRCBLEND, a.a != 0 ? D3DBLEND_INVSRCALPHA : D3DBLEND_SRCALPHA);
SETRS(D3DRS_DESTBLEND, a.a != 0 ? D3DBLEND_SRCALPHA : D3DBLEND_INVSRCALPHA);
}
else {
//b2XAlphaTest = 1;
SETRS(D3DRS_SRCBLEND, a.a != 0 ? blendinvalpha[usec] : blendalpha[usec]);
SETRS(D3DRS_DESTBLEND, a.a != 0 ? blendalpha[usec] : blendinvalpha[usec]);
}
}
}
EndSetAlpha:
//b2XAlphaTest = b2XAlphaTest && bNeedAlphaColor && !bNeedBlendFactorInAlpha;
INC_ALPHAVARS();
}
void ZeroGS::SetWriteDepth()
{
if( conf.mrtdepth ) {
s_bWriteDepth = TRUE;
s_nWriteDepthCount = 4;
}
}
BOOL ZeroGS::IsWriteDepth()
{
return s_bWriteDepth;
}
BOOL ZeroGS::IsWriteDestAlphaTest()
{
return s_bWriteDepth;
}
void ZeroGS::SetDestAlphaTest()
{
s_bDestAlphaTest = TRUE;
s_nWriteDestAlphaTest = 4;
}
void ZeroGS::SetFogColor(u32 fog)
{
if( 1||gs.fogcol != fog ) {
gs.fogcol = fog;
ZeroGS::Flush(0);
ZeroGS::Flush(1);
if( !g_bIsLost ) {
DXVEC4 v;
// set it immediately
v.x = (gs.fogcol&0xff)/255.0f;
v.y = ((gs.fogcol>>8)&0xff)/255.0f;
v.z = ((gs.fogcol>>16)&0xff)/255.0f;
SETCONSTF(GPU_FOGCOLOR, v);
}
}
}
void ZeroGS::ExtWrite()
{
WARN_LOG("ExtWrite\n");
// use local DISPFB, EXTDATA, EXTBUF, and PMODE
// int bpp, start, end;
// tex0Info texframe;
// bpp = 4;
// if( texframe.psm == 0x12 ) bpp = 3;
// else if( texframe.psm & 2 ) bpp = 2;
//
// // get the start and end addresses of the buffer
// GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
}
////////////
// Caches //
////////////
#ifdef __x86_64__
extern "C" void TestClutChangeMMX(void* src, void* dst, int entries, void* pret);
#endif
bool ZeroGS::CheckChangeInClut(u32 highdword, u32 psm)
{
int cld = (highdword >> 29) & 0x7;
int cbp = ((highdword >> 5) & 0x3fff);
// processing the CLUT after tex0/2 are written
switch(cld) {
case 0: return false;
case 1: break; // Seems to rarely not be 1.
// note sure about changing cbp[0,1]
case 4: return gs.cbp[0] != cbp;
case 5: return gs.cbp[1] != cbp;
// default: load
default: break;
}
int cpsm = (highdword >> 19) & 0xe;
int csm = (highdword >> 23) & 0x1;
if( cpsm > 1 || csm )
// don't support 16bit for now
return true;
int csa = (highdword >> 24) & 0x1f;
int entries = (psm&3)==3 ? 256 : 16;
u64* src = (u64*)(g_pbyGSMemory + cbp*256);
u64* dst = (u64*)(g_pbyGSClut+64*csa);
bool bRet = false;
#ifdef __x86_64__
TestClutChangeMMX(dst, src, entries, &bRet);
#else
int storeebx;
// do a fast test with MMX
__asm {
mov storeebx, ebx
mov edx, dst
mov ecx, src
mov ebx, entries
Start:
movq mm0, [edx]
movq mm1, [edx+8]
pcmpeqd mm0, [ecx]
pcmpeqd mm1, [ecx+16]
movq mm2, [edx+16]
movq mm3, [edx+24]
pcmpeqd mm2, [ecx+32]
pcmpeqd mm3, [ecx+48]
pand mm0, mm1
pand mm2, mm3
movq mm4, [edx+32]
movq mm5, [edx+40]
pcmpeqd mm4, [ecx+8]
pcmpeqd mm5, [ecx+24]
pand mm0, mm2
pand mm4, mm5
movq mm6, [edx+48]
movq mm7, [edx+56]
pcmpeqd mm6, [ecx+40]
pcmpeqd mm7, [ecx+56]
pand mm0, mm4
pand mm6, mm7
pand mm0, mm6
pmovmskb eax, mm0
cmp eax, 0xff
je Continue
mov bRet, 1
jmp Return
Continue:
cmp ebx, 16
jle Return
test ebx, 0x10
jz AddEcx
sub ecx, 448 // go back and down one column,
AddEcx:
add ecx, 256 // go to the right block
cmp ebx, 0x90
jne Continue1
add ecx, 256 // skip whole block
Continue1:
add edx, 64
sub ebx, 16
jmp Start
}
Return:
__asm {
emms
mov ebx, storeebx
}
#endif
return bRet;
}
void ZeroGS::texClutWrite(int ctx)
{
s_bTexFlush = 0;
if( g_bIsLost )
return;
tex0Info& tex0 = vb[ctx].tex0;
assert( PSMT_ISCLUT(tex0.psm) );
// processing the CLUT after tex0/2 are written
switch(tex0.cld) {
case 0: return;
case 1: break; // tex0.cld is usually 1.
case 2: gs.cbp[0] = tex0.cbp; break;
case 3: gs.cbp[1] = tex0.cbp; break;
// not sure about changing cbp[0,1]
case 4:
if( gs.cbp[0] == tex0.cbp )
return;
gs.cbp[0] = tex0.cbp;
break;
case 5:
if( gs.cbp[1] == tex0.cbp )
return;
gs.cbp[1] = tex0.cbp;
break;
default: //DEBUG_LOG("cld isn't 0-5!");
break;
}
Flush(!ctx);
int entries = (tex0.psm&3)==3 ? 256 : 16;
if (tex0.csm)
{
switch (tex0.cpsm)
{
// 16bit psm
// eggomania uses non16bit textures for csm2
case PSMCT16:
{
u16* src = (u16*)g_pbyGSMemory + tex0.cbp*128;
u16 *dst = (u16*)(g_pbyGSClut+32*(tex0.csa&15)+(tex0.csa>=16?2:0));
for (int i = 0; i < entries; ++i)
{
*dst = src[getPixelAddress16_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
dst += 2;
// check for wrapping
if (((u32)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut+2);
}
break;
}
case PSMCT16S:
{
u16* src = (u16*)g_pbyGSMemory + tex0.cbp*128;
u16 *dst = (u16*)(g_pbyGSClut+32*(tex0.csa&15)+(tex0.csa>=16?2:0));
for (int i = 0; i < entries; ++i)
{
*dst = src[getPixelAddress16S_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
dst += 2;
// check for wrapping
if (((u32)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut+2);
}
break;
}
case PSMCT32:
case PSMCT24:
{
u32* src = (u32*)g_pbyGSMemory + tex0.cbp*64;
u32 *dst = (u32*)(g_pbyGSClut+64*tex0.csa);
// check if address exceeds src
if( src+getPixelAddress32_0(gs.clut.cou+entries-1, gs.clut.cov, gs.clut.cbw) >= (u32*)g_pbyGSMemory + 0x00100000 )
ERROR_LOG("texClutWrite out of bounds\n");
else
for(int i = 0; i < entries; ++i)
{
*dst = src[getPixelAddress32_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
dst++;
}
}
break;
default:
{
#ifndef RELEASE_TO_PUBLIC
//DEBUG_LOG("unknown cpsm: %x (%x)\n", tex0.cpsm, tex0.psm);
#endif
break;
}
}
}
else
{
switch (tex0.cpsm)
{
case PSMCT24:
case PSMCT32:
if( entries == 16 )
WriteCLUT_T32_I4_CSM1((u32*)(g_pbyGSMemory + tex0.cbp*256), (u32*)(g_pbyGSClut+64*tex0.csa));
else
WriteCLUT_T32_I8_CSM1((u32*)(g_pbyGSMemory + tex0.cbp*256), (u32*)(g_pbyGSClut+64*tex0.csa));
break;
default:
if( entries == 16 )
WriteCLUT_T16_I4_CSM1((u32*)(g_pbyGSMemory + 256 * tex0.cbp), (u32*)(g_pbyGSClut+32*(tex0.csa&15)+(tex0.csa>=16?2:0)));
else // sse2 for 256 is more complicated, so use regular
WriteCLUT_T16_I8_CSM1_c((u32*)(g_pbyGSMemory + 256 * tex0.cbp), (u32*)(g_pbyGSClut+32*(tex0.csa&15)+(tex0.csa>=16?2:0)));
break;
}
}
}
void ZeroGS::SetTexFlush()
{
s_bTexFlush = TRUE;
// if( PSMT_ISCLUT(vb[0].tex0.psm) )
// texClutWrite(0);
// if( PSMT_ISCLUT(vb[1].tex0.psm) )
// texClutWrite(1);
if( !s_bForceTexFlush )
{
if( s_ptexCurSet[0] != s_ptexNextSet[0] )
{
s_ptexCurSet[0] = s_ptexNextSet[0];
pd3dDevice->SetTexture(SAMP_MEMORY0, s_ptexNextSet[0]);
}
if( s_ptexCurSet[1] != s_ptexNextSet[1] )
{
s_ptexCurSet[1] = s_ptexNextSet[1];
pd3dDevice->SetTexture(SAMP_MEMORY1, s_ptexNextSet[1]);
}
}
}
int ZeroGS::Save(char* pbydata)
{
if( pbydata == NULL )
return 40 + 0x00400000 + sizeof(gs) + 2*VBSAVELIMIT + 2*sizeof(frameInfo) + 4 + 256*4;
s_RTs.ResolveAll();
s_DepthRTs.ResolveAll();
vb[0].Unlock();
vb[1].Unlock();
strcpy(pbydata, libraryName);
*(u32*)(pbydata+16) = ZEROGS_SAVEVER;
pbydata += 32;
*(int*)pbydata = icurctx; pbydata += 4;
*(int*)pbydata = VBSAVELIMIT; pbydata += 4;
memcpy(pbydata, g_pbyGSMemory, 0x00400000);
pbydata += 0x00400000;
memcpy(pbydata, g_pbyGSClut, 256*4);
pbydata += 256*4;
*(int*)pbydata = sizeof(gs);
pbydata += 4;
memcpy(pbydata, &gs, sizeof(gs));
pbydata += sizeof(gs);
for(int i = 0; i < 2; ++i) {
memcpy(pbydata, &vb[i], VBSAVELIMIT);
pbydata += VBSAVELIMIT;
}
vb[0].Lock();
vb[1].Lock();
return 0;
}
extern u32 s_uTex1Data[2][2], s_uClampData[2];
extern char *libraryName;
bool ZeroGS::Load(char* pbydata)
{
memset(s_uTex1Data, 0, sizeof(s_uTex1Data));
memset(s_uClampData, 0, sizeof(s_uClampData));
// first 32 bytes are the id
u32 savever = *(u32*)(pbydata+16);
if( strncmp(pbydata, libraryName, 6) == 0 && (savever == ZEROGS_SAVEVER || savever == 0xaa000004) ) {
g_MemTargs.Destroy();
GSStateReset();
pbydata += 32;
int context = *(int*)pbydata; pbydata += 4;
u32 savelimit = VBSAVELIMIT;
savelimit = *(u32*)pbydata; pbydata += 4;
memcpy(g_pbyGSMemory, pbydata, 0x00400000);
pbydata += 0x00400000;
memcpy(g_pbyGSClut, pbydata, 256*4);
pbydata += 256*4;
memset(&gs, 0, sizeof(gs));
int savedgssize;
if( savever == 0xaa000004 )
savedgssize = 0x1d0;
else {
savedgssize = *(int*)pbydata;
pbydata += 4;
}
memcpy(&gs, pbydata, savedgssize);
pbydata += savedgssize;
prim = &gs._prim[gs.prac];
LPD3DVB pvb = vb[0].pvb;
memcpy(&vb[0], pbydata, min(savelimit, VBSAVELIMIT));
pbydata += savelimit;
vb[0].pvb = pvb;
pvb = vb[1].pvb;
memcpy(&vb[1], pbydata, min(savelimit, VBSAVELIMIT));
pbydata += savelimit;
vb[1].pvb = pvb;
for(int i = 0; i < 2; ++i) {
vb[i].bNeedZCheck = vb[i].bNeedFrameCheck = 1;
vb[i].bSyncVars = 0; vb[i].bNeedTexCheck = 1;
memset(vb[i].uCurTex0Data, 0, sizeof(vb[i].uCurTex0Data));
}
icurctx = -1;
pd3dDevice->SetRenderTarget(0, psurfOrgTarg);
pd3dDevice->SetRenderTarget(1, NULL);
pd3dDevice->SetDepthStencilSurface(psurfOrgDepth);
SetFogColor(gs.fogcol);
vb[0].Lock();
vb[1].Lock();
return true;
}
return false;
}
void ZeroGS::SaveSnapshot(const char* filename)
{
g_bMakeSnapshot = 1;
strSnapshot = filename;
}
// AVI capture stuff
bool ZeroGS::StartCapture()
{
if( !s_aviinit ) {
START_AVI("zerogs.avi");
assert( s_ptexAVICapture == NULL );
if( FAILED(pd3dDevice->CreateOffscreenPlainSurface(width, height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &s_ptexAVICapture, NULL)) ) {
STOP_AVI();
ZeroGS::AddMessage("Failed to create avi");
return false;
}
s_aviinit = 1;
}
else {
DEBUG_LOG("ZeroGS: Continuing from previous capture");
}
s_avicapturing = 1;
return true;
}
void ZeroGS::StopCapture()
{
s_avicapturing = 0;
}
void ZeroGS::CaptureFrame()
{
assert( s_avicapturing && s_aviinit && s_ptexAVICapture != NULL );
vector<BYTE> mem;
pd3dDevice->GetRenderTargetData(psurfOrgTarg, s_ptexAVICapture);
D3DLOCKED_RECT lock;
mem.resize(width * height * 4);
s_ptexAVICapture->LockRect(&lock, NULL, D3DLOCK_READONLY);
assert( lock.Pitch == width*4 );
BYTE* pend = (BYTE*)lock.pBits + (conf.height-1)*width*4;
for(int i = 0; i < conf.height; ++i) {
memcpy_amd(&mem[width*4*i], pend - width*4*i, width * 4);
}
s_ptexAVICapture->UnlockRect();
int fps = SMODE1->CMOD == 3 ? 50 : 60;
bool bSuccess = ADD_FRAME_FROM_DIB_TO_AVI("AAAA", fps, width, height, 32, &mem[0]);
if( !bSuccess ) {
s_avicapturing = 0;
STOP_AVI();
s_aviinit = 0;
ZeroGS::AddMessage("Failed to create avi");
return;
}
}