pcsx2/plugins/zzogl-pg/opengl/ZZoglFlush.cpp

2747 lines
71 KiB
C++

/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
// Realization of Flush -- drawing function of GS
#include <stdlib.h>
#include <math.h>
#include "GS.h"
#include "Mem.h"
#include "targets.h"
#include "ZZoglFlushHack.h"
#include "ZZoglShaders.h"
#include "ZZClut.h"
#include "ZZoglFlush.h"
//------------------ Defines
#ifdef ZEROGS_DEVBUILD
bool g_bUpdateEffect = false;
bool g_bSaveTex = false; // saves the current texture
bool g_bSaveResolved = false;
#endif // !defined(ZEROGS_DEVBUILD)
bool g_bSaveTrans = false;
bool s_bWriteDepth = false;
bool s_bDestAlphaTest = false;
bool g_bUpdateStencil = true;
bool bCanRenderStencil = true;
// local alpha blending settings
GLenum s_rgbeq, s_alphaeq; // set by zgsBlendEquationSeparateEXT // ZZ
// Note: blendalpha[2] & blendinvalpha[2] are never used !!! The index 2 is changed to 0
// Note: blendalpha[3] & blendinvalpha[3] are special case for dest blending on 24bits. FIXME: I was expected GL_ONE & GL_ZERO ! -- greg
static const u32 blendalpha[4] = { GL_SRC_ALPHA, GL_DST_ALPHA, GL_CONSTANT_COLOR_EXT, GL_SRC_ALPHA }; // ZZ
static const u32 blendinvalpha[4] = { GL_ONE_MINUS_SRC_ALPHA, GL_ONE_MINUS_DST_ALPHA, GL_ONE_MINUS_CONSTANT_COLOR_EXT, GL_ONE_MINUS_SRC_ALPHA }; //ZZ
static const u32 g_dwAlphaCmp[] = { GL_NEVER, GL_ALWAYS, GL_LESS, GL_LEQUAL, GL_EQUAL, GL_GEQUAL, GL_GREATER, GL_NOTEQUAL }; // ZZ
// used for afail case
static const u32 g_dwReverseAlphaCmp[] = { GL_ALWAYS, GL_NEVER, GL_GEQUAL, GL_GREATER, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_EQUAL };
static const u32 g_dwZCmp[] = { GL_NEVER, GL_ALWAYS, GL_GEQUAL, GL_GREATER };
/////////////////////
// graphics resources
#define s_bForceTexFlush 1 // ZZ
static u32 s_ptexCurSet[2] = {0};
static u32 s_ptexNextSet[2] = {0}; // ZZ
int s_ClutResolve = 0; // ZZ
int g_nDepthUsed = 0; // ffx2 pal movies
int s_nWriteDepthCount = 0; // ZZ
int s_nWriteDestAlphaTest = 0; // ZZ
int s_nWireframeCount = 0;
////////////////////
// State parameters
static float4 vAlphaBlendColor; // used for GPU_COLOR
static bool bNeedBlendFactorInAlpha; // set if the output source alpha is different from the real source alpha (only when blend factor > 0x80)
static u32 s_dwColorWrite = 0xf; // the color write mask of the current target
// g_flag_vars g_vars;
//static alphaInfo s_alphaInfo; // ZZ
int g_PrevBitwiseTexX = -1, g_PrevBitwiseTexY = -1; // textures stored in SAMP_BITWISEANDX and SAMP_BITWISEANDY // ZZ
float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
Point AA = {0,0}; // if AA.y is set, then AA.x has to be set.
VB vb[2];
int icurctx = -1;
void Draw(const VB& curvb)
{
#ifdef GLSL4_API
ZZshSetupShader();
#endif
glDrawArrays(primtype[curvb.curprim.prim], 0, curvb.nCount);
}
inline float AlphaReferedValue(int aref)
{
// return (b2XAlphaTest) ? min(1.0f, (float)aref / 127.5f) : (float)aref / 255.0f ;
return min(1.0f, (float)aref / 127.5f);
}
inline void SetAlphaTest(const pixTest& curtest)
{
// if s_dwColorWrite is nontrivial, than we should not off alphatest.
// This fix GOW and Okami.
if (!curtest.ate && USEALPHATESTING && (s_dwColorWrite != 2 && s_dwColorWrite != 14))
{
glDisable(GL_ALPHA_TEST);
}
else
{
glEnable(GL_ALPHA_TEST);
glAlphaFunc(g_dwAlphaCmp[curtest.atst], AlphaReferedValue(curtest.aref));
}
}
// Return, if tcc, aem or psm mode told us, than Alpha test should be used
// if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure
// that it's correct, psm -- color mode,
inline bool IsAlphaTestExpansion(tex0Info tex0)
{
return (tex0.tcc && gs.texa.aem && PSMT_ALPHAEXP(PIXEL_STORAGE_FORMAT(tex0)));
}
// Switch wireframe rendering off for first flush, so it's draw few solid primitives
inline void SwitchWireframeOff()
{
if (conf.wireframe())
{
if (s_nWireframeCount > 0)
{
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
}
}
}
// Switch wireframe rendering on, look at previous function
inline void SwitchWireframeOn()
{
if (conf.wireframe())
{
if (s_nWireframeCount > 0)
{
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
--s_nWireframeCount;
}
}
}
extern u32 ptexBilinearBlocks;
int GetTexFilter(const tex1Info& tex1)
{
// always force
if (conf.bilinear == 2) return 1;
int texfilter = 0;
if (conf.bilinear && ptexBilinearBlocks != 0)
{
if (tex1.mmin <= 1)
texfilter = tex1.mmin | tex1.mmag;
else
texfilter = tex1.mmag ? ((tex1.mmin + 2) & 5) : tex1.mmin;
texfilter = texfilter == 1 || texfilter == 4 || texfilter == 5;
}
return texfilter;
}
void ReloadEffects()
{
#ifdef ZEROGS_DEVBUILD
for (u32 i = 0; i < ArraySize(ppsTexture); ++i)
{
SAFE_RELEASE_PROG(ppsTexture[i].prog);
}
memset(ppsTexture, 0, sizeof(ppsTexture));
ZZshLoadExtraEffects();
#endif
}
long BufferNumber = 0;
// This is a debug function. It prints all buffer info and save current texture into the file, then prints the file name.
inline void VisualBufferMessage(int context)
{
#if defined(WRITE_PRIM_LOGS) && defined(_DEBUG)
BufferNumber++;
VB& curvb = vb[context];
static const char* patst[8] = { "NEVER", "ALWAYS", "LESS", "LEQUAL", "EQUAL", "GEQUAL", "GREATER", "NOTEQUAL"};
static const char* pztst[4] = { "NEVER", "ALWAYS", "GEQUAL", "GREATER" };
static const char* pafail[4] = { "KEEP", "FB_ONLY", "ZB_ONLY", "RGB_ONLY" };
ZZLog::Debug_Log("**Drawing ctx %d, num %d, fbp: 0x%x, zbp: 0x%x, fpsm: %d, zpsm: %d, fbw: %d", context, vb[context].nCount, curvb.prndr->fbp, curvb.zbuf.zbp, curvb.prndr->psm, curvb.zbuf.psm, curvb.prndr->fbw);
ZZLog::Debug_Log("prim: prim=%x iip=%x tme=%x fge=%x abe=%x aa1=%x fst=%x ctxt=%x fix=%x",
curvb.curprim.prim, curvb.curprim.iip, curvb.curprim.tme, curvb.curprim.fge, curvb.curprim.abe, curvb.curprim.aa1, curvb.curprim.fst, curvb.curprim.ctxt, curvb.curprim.fix);
ZZLog::Debug_Log("test: ate:%d, atst: %s, aref: %d, afail: %s, date: %d, datm: %d, zte: %d, ztst: %s, fba: %d",
curvb.test.ate, patst[curvb.test.atst], curvb.test.aref, pafail[curvb.test.afail], curvb.test.date, curvb.test.datm, curvb.test.zte, pztst[curvb.test.ztst], curvb.fba.fba);
ZZLog::Debug_Log("alpha: A%d B%d C%d D%d FIX:%d pabe: %d; aem: %d, ta0: %d, ta1: %d\n", curvb.alpha.a, curvb.alpha.b, curvb.alpha.c, curvb.alpha.d, curvb.alpha.fix, gs.pabe, gs.texa.aem, gs.texa.ta[0], gs.texa.ta[1]);
ZZLog::Debug_Log("tex0: tbp0=0x%x, tbw=%d, psm=0x%x, tw=%d, th=%d, tcc=%d, tfx=%d, cbp=0x%x, cpsm=0x%x, csm=%d, csa=%d, cld=%d",
curvb.tex0.tbp0, curvb.tex0.tbw, curvb.tex0.psm, curvb.tex0.tw,
curvb.tex0.th, curvb.tex0.tcc, curvb.tex0.tfx, curvb.tex0.cbp,
curvb.tex0.cpsm, curvb.tex0.csm, curvb.tex0.csa, curvb.tex0.cld);
char* Name;
Name = NamedSaveTex(&curvb.tex0, 1);
ZZLog::Error_Log("TGA name '%s'.", Name);
free(Name);
ZZLog::Debug_Log("buffer %ld.\n", BufferNumber);
#endif
}
inline void SaveRendererTarget(VB& curvb)
{
#ifdef _DEBUG
// Needs a # after rndr to work...
// char str[255];
// sprintf(str, "rndr.tga");
// SaveRenderTarget(str, curvb.prndr->fbw, curvb.prndr->fbh, 0);
#endif
}
// Stop effects in Developers mode
inline void FlushUpdateEffect()
{
#if defined(DEVBUILD)
if (g_bUpdateEffect)
{
ReloadEffects();
g_bUpdateEffect = 0;
}
#endif
}
// Check, maybe we could skip flush
inline bool IsFlushNoNeed(VB& curvb, const pixTest& curtest)
{
if (curvb.nCount == 0 || (curtest.zte && curtest.ztst == 0) || IsBadFrame(curvb))
{
curvb.nCount = 0;
return true;
}
return false;
}
// Transfer targets, that are located in current texture.
inline void FlushTransferRangesHelper(VB& curvb)
{
if (s_RangeMngr.ranges.size() > 0)
{
// don't want infinite loop, so set nCount to 0.
u32 prevcount = curvb.nCount;
curvb.nCount = 0;
FlushTransferRanges(curvb.curprim.tme ? &curvb.tex0 : NULL);
curvb.nCount += prevcount;
}
}
// If set bit for texture checking, do it. Maybe it's all.
inline bool FushTexDataHelper(VB& curvb)
{
if (curvb.bNeedFrameCheck || curvb.bNeedZCheck)
{
curvb.CheckFrame(curvb.curprim.tme ? curvb.tex0.tbp0 : 0);
}
if (curvb.bNeedTexCheck) // Zeydlitz want to try this
{
curvb.FlushTexData();
if (curvb.nCount == 0) return true;
}
return false;
}
// Null target mean that we do something really bad.
inline bool FlushCheckForNULLTarget(VB& curvb, int context)
{
if ((curvb.prndr == NULL) || (curvb.pdepth == NULL))
{
ERROR_LOG_SPAMA("Current render target NULL (ctx: %d)", context);
curvb.nCount = 0;
return true;
}
return false;
}
// O.k. A set of resolutions, we do before real flush. We do RangeManager, FrameCheck and
// ZCheck before this.
inline bool FlushInitialTest(VB& curvb, const pixTest& curtest, int context)
{
GL_REPORT_ERRORD();
assert(context >= 0 && context <= 1);
FlushUpdateEffect();
if (IsFlushNoNeed(curvb, curtest)) return true;
FlushTransferRangesHelper(curvb);
if (FushTexDataHelper(curvb)) return true;
GL_REPORT_ERRORD();
if (FlushCheckForNULLTarget(curvb, context)) return true;
return false;
}
inline void TargetLog(int& tbw, int& tbp0, int& tpsm, VB& curvb, bool miss)
{
#ifdef _DEBUG
if (tbp0 == 0x3600 && tbw == 0x100)
{
if (miss)
{
ZZLog::Debug_Log("Miss %x 0x%x %d", tbw, tbp0, tpsm);
typedef map<u32, CRenderTarget*> MAPTARGETS;
for (MAPTARGETS::iterator itnew = s_RTs.mapTargets.begin(); itnew != s_RTs.mapTargets.end(); ++itnew)
{
ZZLog::Debug_Log("\tRender %x 0x%x %x", itnew->second->fbw, itnew->second->fbp, itnew->second->psm);
}
for (MAPTARGETS::iterator itnew = s_DepthRTs.mapTargets.begin(); itnew != s_DepthRTs.mapTargets.end(); ++itnew)
{
ZZLog::Debug_Log("\tDepth %x 0x%x %x", itnew->second->fbw, itnew->second->fbp, itnew->second->psm);
}
ZZLog::Debug_Log("\tCurvb 0x%x 0x%x 0x%x %x", curvb.frame.fbp, curvb.prndr->end, curvb.prndr->fbp, curvb.prndr->fbw);
}
else
ZZLog::Debug_Log("Hit %x 0x%x %x", tbw, tbp0, tpsm);
}
#endif
}
// Try to different approach if texture target was not found
inline CRenderTarget* FlushReGetTarget(int& tbw, int& tbp0, int& tpsm, VB& curvb)
{
// This was incorrect code
CRenderTarget* ptextarg = NULL;
if (PSMT_ISZTEX(tpsm))
{
// try depth
ptextarg = s_DepthRTs.GetTarg(tbp0, tbw);
}
// I wonder if either of these hacks are useful, or if I can just remove them?
if ((conf.settings().reget) && (tpsm == PSMT8))
{
// check for targets with half the width. Break Valkyrie Chronicles
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2);
if (ptextarg == NULL)
{
tbp0 &= ~0x7ff;
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2); // mgs3 hack
if (ptextarg == NULL)
{
// check the next level (mgs3)
tbp0 &= ~0xfff;
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2); // mgs3 hack
}
if (ptextarg != NULL && ptextarg->start > tbp0*256)
{
// target beyond range, so ignore
ptextarg = NULL;
}
}
}
if ((conf.settings().texture_targs) && (ptextarg == NULL))
{
// check if any part of the texture intersects the current target
if (!PSMT_ISCLUT(tpsm) && (curvb.tex0.tbp0 >= curvb.frame.fbp) && ((curvb.tex0.tbp0) < curvb.prndr->end))
{
ptextarg = curvb.prndr;
}
}
TargetLog(tbw, tbp0, tpsm, curvb, (ptextarg == NULL));
return ptextarg;
}
// Find target to draw a texture.
inline CRenderTarget* FlushGetTarget(VB& curvb)
{
int tbw, tbp0, tpsm;
CRenderTarget* ptextarg = NULL;
if (!curvb.curprim.tme) return ptextarg; // Which would be NULL, currently.
if (curvb.bNeedTexCheck)
{
ZZLog::Error_Log("How it is possible?");
// not yet initied, but still need to get correct target! (xeno3 ingame)
tbp0 = ZZOglGet_tbp0_TexBits(curvb.uNextTex0Data[0]);
tbw = ZZOglGet_tbw_TexBitsMult(curvb.uNextTex0Data[0]);
tpsm = ZZOglGet_psm_TexBitsFix(curvb.uNextTex0Data[0]);
}
else
{
tbw = curvb.tex0.tbw;
tbp0 = curvb.tex0.tbp0;
tpsm = curvb.tex0.psm;
}
ptextarg = s_RTs.GetTarg(tbp0, tbw);
if (ptextarg == NULL) ptextarg = FlushReGetTarget(tbw, tbp0, tpsm, curvb);
if ((ptextarg != NULL) && !(ptextarg->status & CRenderTarget::TS_NeedUpdate))
{
if (PSMT_BITMODE(tpsm) == 4) // handle 8h cluts
{
// don't support clut targets, read from mem
// 4hl - kh2 check - from dx version -- arcum42
if (tpsm == PSMT4 && s_ClutResolve <= 1)
{
// xenosaga requires 2 resolves
u32 prevcount = curvb.nCount;
curvb.nCount = 0;
ptextarg->Resolve();
s_ClutResolve++;
curvb.nCount += prevcount;
}
ptextarg = NULL;
}
else
{
if (ptextarg == curvb.prndr)
{
// need feedback
curvb.prndr->CreateFeedback();
if (s_bWriteDepth && (curvb.pdepth != NULL))
curvb.pdepth->SetRenderTarget(1);
else
ResetRenderTarget(1);
}
}
}
else
{
// If a texture needs updating, clear it.
ptextarg = NULL;
}
return ptextarg;
}
// Set target for current context
inline void FlushSetContextTarget(VB& curvb, int context)
{
if (!curvb.bVarsSetTarg)
{
SetContextTarget(context);
}
else
{
assert(curvb.pdepth != NULL);
if (curvb.pdepth->status & CRenderTarget::TS_Virtual)
{
if (!curvb.zbuf.zmsk)
{
CRenderTarget* ptemp = s_DepthRTs.Promote(GetFrameKey(curvb.pdepth));
assert(ptemp == curvb.pdepth);
}
else
{
curvb.pdepth->status &= ~CRenderTarget::TS_NeedUpdate;
}
}
if ((curvb.pdepth->status & CRenderTarget::TS_NeedUpdate) || (curvb.prndr->status & CRenderTarget::TS_NeedUpdate))
SetContextTarget(context);
}
assert(!(curvb.prndr->status&CRenderTarget::TS_NeedUpdate));
curvb.prndr->status = 0;
if (curvb.pdepth != NULL)
{
#ifdef _DEBUG
// Reduce an assert to a warning.
if (curvb.pdepth->status & CRenderTarget::TS_NeedUpdate)
{
ZZLog::Debug_Log("In FlushSetContextTarget, pdepth has TS_NeedUpdate set.");
}
#endif
if (!curvb.zbuf.zmsk)
{
assert(!(curvb.pdepth->status & CRenderTarget::TS_Virtual));
curvb.pdepth->status = 0;
}
}
}
inline void FlushSetStream(VB& curvb)
{
// setup current buffer
glBindBuffer(GL_ARRAY_BUFFER, g_vboBuffers[g_nCurVBOIndex]);
glBufferData(GL_ARRAY_BUFFER, curvb.nCount * sizeof(VertexGPU), curvb.pBufferData, GL_STREAM_DRAW);
g_nCurVBOIndex = (g_nCurVBOIndex + 1) % ArraySize(g_vboBuffers);
// void* pdata = glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
// memcpy_amd(pdata, curvb.pBufferData, curvb.nCount * sizeof(VertexGPU));
// glUnmapBuffer(GL_ARRAY_BUFFER);
SET_STREAM();
GL_REPORT_ERRORD();
}
// Set color mask. Really, it's not as good as PS2 one.
inline void FlushSetColorMask(VB& curvb)
{
s_dwColorWrite = (PSMT_BITMODE(curvb.prndr->psm) == 1) ? (COLORMASK_BLUE | COLORMASK_GREEN | COLORMASK_RED) : 0xf;
int maskR = ZZOglGet_fbmRed_FrameBits(curvb.frame.fbm);
int maskG = ZZOglGet_fbmGreen_FrameBits(curvb.frame.fbm);
int maskB = ZZOglGet_fbmBlue_FrameBits(curvb.frame.fbm);
int maskA = ZZOglGet_fbmAlpha_FrameBits(curvb.frame.fbm);
if (maskR == 0xff) s_dwColorWrite &= ~COLORMASK_RED;
if (maskG == 0xff) s_dwColorWrite &= ~COLORMASK_GREEN;
if (maskB == 0xff) s_dwColorWrite &= ~COLORMASK_BLUE;
if ((maskA == 0xff) || (curvb.curprim.abe && (curvb.test.atst == ATST_LESS && curvb.test.aref == 128)))
s_dwColorWrite &= ~COLORMASK_ALPHA;
GL_COLORMASK(s_dwColorWrite);
}
// Set Scissors for scissor test.
inline void FlushSetScissorRect(VB& curvb)
{
Rect& scissor = curvb.prndr->scissorrect;
glScissor(scissor.x, scissor.y, scissor.w, scissor.h);
}
// Prior really doing something check context
inline void FlushDoContextJob(VB& curvb, int context)
{
SaveRendererTarget(curvb);
FlushSetContextTarget(curvb, context);
icurctx = context;
FlushSetStream(curvb);
FlushSetColorMask(curvb);
FlushSetScissorRect(curvb);
}
// Set 1 is Alpha test is EQUAL and alpha should be proceed with care.
inline int FlushGetExactcolor(const pixTest curtest)
{
if (!(g_nPixelShaderVer&SHADER_REDUCED))
// ffx2 breaks when ==7
return ((curtest.ate && curtest.aref <= 128) && (curtest.atst == ATST_EQUAL));// || curtest.atst == ATST_NOTEQUAL);
return 0;
}
// fill the buffer by decoding the clut
inline void FlushDecodeClut(VB& curvb, GLuint& ptexclut)
{
glGenTextures(1, &ptexclut);
glBindTexture(GL_TEXTURE_2D, ptexclut);
if (ptexclut != 0)
{
int clutsize;
int entries = PSMT_IS8CLUT(curvb.tex0.psm) ? 256 : 16;
if (curvb.tex0.csm && curvb.tex0.csa) ZZLog::Debug_Log("ERROR, csm1.");
if (PSMT_IS32BIT(curvb.tex0.cpsm)) {
__aligned16 u32 data[256];
clutsize = min(entries, 256 - curvb.tex0.csa * 16) * 4;
ClutBuffer_to_Array<u32>((u32*)data, curvb.tex0.csa, clutsize);
Texture2D(4, 256, 1, GL_RGBA, GL_UNSIGNED_BYTE, data);
} else {
__aligned16 u16 data[256];
clutsize = min(entries, 512 - curvb.tex0.csa * 16) * 2;
ClutBuffer_to_Array<u16>((u16*)data, curvb.tex0.csa, clutsize);
Texture2D(4, 256, 1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1, data);
}
s_vecTempTextures.push_back(ptexclut);
if (g_bSaveTex) SaveTexture("clut.tga", GL_TEXTURE_2D, ptexclut, 256, 1);
setTex2DWrap(GL_REPEAT);
setTex2DFilters(GL_LINEAR);
}
}
inline int FlushGetShaderType(VB& curvb, CRenderTarget* ptextarg, GLuint& ptexclut)
{
if (PSMT_ISCLUT(curvb.tex0.psm) && !(conf.settings().no_target_clut))
{
FlushDecodeClut(curvb, ptexclut);
if (!(g_nPixelShaderVer&SHADER_REDUCED) && PSMT_ISHALF(ptextarg->psm))
{
return 4;
}
else
{
// Valkyrie
return 2;
}
}
if (PSMT_ISHALF_STORAGE(curvb.tex0) != PSMT_ISHALF(ptextarg->psm) && (!(g_nPixelShaderVer&SHADER_REDUCED) || !curvb.curprim.fge))
{
if (PSMT_ISHALF_STORAGE(curvb.tex0))
{
// converting from 32->16
// Radiata Chronicles
return 3;
}
else
{
// converting from 16->32
// Star Ward: Force
return 0;
}
}
return 1;
}
//Set page offsets depends on shader type.
inline void FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRenderTarget* ptextarg)
{
SetShaderCaller("FlushSetPageOffset");
float4 vpageoffset;
vpageoffset.w = 0;
switch (shadertype)
{
case 3:
vpageoffset.x = -0.1f / 256.0f;
vpageoffset.y = -0.001f / 256.0f;
vpageoffset.z = -0.1f / (ptextarg->fbh);
vpageoffset.w = 0.0f;
break;
case 4:
vpageoffset.x = 2;
vpageoffset.y = 1;
vpageoffset.z = 0;
vpageoffset.w = 0.0001f;
break;
}
// zoe2
if (PSMT_ISZTEX(ptextarg->psm)) vpageoffset.w = -1.0f;
ZZshSetParameter4fv(pfragment->prog, pfragment->fPageOffset, vpageoffset, "g_fPageOffset");
}
//Setting texture offsets depends on shader type.
inline void FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg)
{
SetShaderCaller("FlushSetTexOffset");
float4 v;
if (shadertype == 3)
{
float4 v;
v.x = 16.0f / (float)curvb.tex0.tw;
v.y = 16.0f / (float)curvb.tex0.th;
v.z = 0.5f * v.x;
v.w = 0.5f * v.y;
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
}
else if (shadertype == 4)
{
float4 v;
v.x = 16.0f / (float)ptextarg->fbw;
v.y = 16.0f / (float)ptextarg->fbh;
v.z = -1;
v.w = 8.0f / (float)ptextarg->fbh;
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
}
}
// Set dimension (Real!) of texture. z and w
inline void FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg)
{
SetShaderCaller("FlushTextureDims");
float4 vTexDims;
vTexDims.x = (float)RW(curvb.tex0.tw) ;
vTexDims.y = (float)RH(curvb.tex0.th) ;
// look at the offset of tbp0 from fbp
if (curvb.tex0.tbp0 <= ptextarg->fbp)
{
vTexDims.z = 0;//-0.5f/(float)ptextarg->fbw;
vTexDims.w = 0;//0.2f/(float)ptextarg->fbh;
}
else
{
//u32 tbp0 = curvb.tex0.tbp0 >> 5; // align to a page
int blockheight = PSMT_ISHALF(ptextarg->psm) ? 64 : 32;
int ycoord = ((curvb.tex0.tbp0 - ptextarg->fbp) / (32 * (ptextarg->fbw >> 6))) * blockheight;
int xcoord = (((curvb.tex0.tbp0 - ptextarg->fbp) % (32 * (ptextarg -> fbw >> 6)))) * 2;
vTexDims.z = (float)xcoord;
vTexDims.w = (float)ycoord;
}
if (shadertype == 4)
vTexDims.z += 8.0f;
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexDims, vTexDims, "g_fTexDims");
}
// Apply TEX1 mmag and mmin -- filter for expanding/reducing texture
// We ignore all settings, only NEAREST (0) is used
inline void FlushApplyResizeFilter(VB& curvb, u32& dwFilterOpts, CRenderTarget* ptextarg, int context)
{
u32 ptexset = (ptextarg == curvb.prndr) ? ptextarg->ptexFeedback : ptextarg->ptex;
s_ptexCurSet[context] = ptexset;
if ((!curvb.tex1.mmag) || (!curvb.tex1.mmin))
glBindTexture(GL_TEXTURE_RECTANGLE_NV, ptexset);
if (!curvb.tex1.mmag)
{
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
dwFilterOpts |= 1;
}
if (!curvb.tex1.mmin)
{
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
dwFilterOpts |= 2;
}
}
// Usage existing targets depends on several tricks, 32-16 conversion and CLUTing, so we need to handle it.
inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptextarg, u32& dwFilterOpts, int exactcolor, int context)
{
if (ptextarg->IsDepth())
SetWriteDepth();
GLuint ptexclut = 0;
//int psm = PIXEL_STORAGE_FORMAT(curvb.tex0);
int shadertype = FlushGetShaderType(curvb, ptextarg, ptexclut);
FRAGMENTSHADER* pfragment = ZZshLoadShadeEffect(shadertype, 0, curvb.curprim.fge,
IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL);
FlushSetPageOffset(pfragment, shadertype, ptextarg);
FlushSetTexOffset(pfragment, shadertype, curvb, ptextarg);
FlushTextureDims(pfragment, shadertype, curvb, ptextarg);
if (pfragment->sCLUT != NULL && ptexclut != 0)
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sCLUT, ptexclut, "CLUT");
FlushApplyResizeFilter(curvb, dwFilterOpts, ptextarg, context);
if (g_bSaveTex)
SaveTexture("tex.tga", GL_TEXTURE_RECTANGLE_NV,
ptextarg == curvb.prndr ? ptextarg->ptexFeedback : ptextarg->ptex, RW(ptextarg->fbw), RH(ptextarg->fbh));
return pfragment;
}
// Usage most major shader.
inline FRAGMENTSHADER* FlushMadeNewTarget(VB& curvb, int exactcolor, int context)
{
// save the texture
if (g_bSaveTex)
{
// FIXME: I suspect one of g_bSaveTex test variable is wrong
if (g_bSaveTex == 1)
{
SaveTex(&curvb.tex0, 1);
/*CMemoryTarget* pmemtarg = */
g_MemTargs.GetMemoryTarget(curvb.tex0, 0);
}
else
{
SaveTex(&curvb.tex0, 0);
}
}
FRAGMENTSHADER* pfragment = ZZshLoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge,
IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL);
if (pfragment == NULL)
ZZLog::Error_Log("Could not find memory target shader.");
return pfragment;
}
// We made an shader, so now need to put all common variables.
inline void FlushSetTexture(VB& curvb, FRAGMENTSHADER* pfragment, CRenderTarget* ptextarg, int context)
{
SetTexVariables(context, pfragment);
SetTexInt(context, pfragment, ptextarg == NULL);
// have to enable the texture parameters(curtest.atst)
if( curvb.ptexClamp[0] != 0 )
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sBitwiseANDX, curvb.ptexClamp[0], "Clamp 0");
if( curvb.ptexClamp[1] != 0 )
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sBitwiseANDY, curvb.ptexClamp[1], "Clamp 1");
// FIXME condition is a bit strange for GLSL
#ifdef GLSL4_API
if( s_ptexCurSet[context] != 0)
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sMemory, s_ptexCurSet[context], "Clamp memory");
#else
if( pfragment->sMemory != NULL && s_ptexCurSet[context] != 0)
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sMemory, s_ptexCurSet[context], "Clamp memory");
#endif
}
// Reset program and texture variables;
inline void FlushBindProgram(FRAGMENTSHADER* pfragment, int context)
{
vb[context].bTexConstsSync = 0;
vb[context].bVarsTexSync = 0;
ZZshSetPixelShader(pfragment->prog);
}
inline FRAGMENTSHADER* FlushRendererStage(VB& curvb, u32& dwFilterOpts, CRenderTarget* ptextarg, int exactcolor, int context)
{
FRAGMENTSHADER* pfragment = NULL;
// set the correct pixel shaders
if (curvb.curprim.tme)
{
if (ptextarg != NULL)
pfragment = FlushUseExistRenderTarget(curvb, ptextarg, dwFilterOpts, exactcolor, context);
else
pfragment = FlushMadeNewTarget(curvb, exactcolor, context);
if (pfragment == NULL)
{
ZZLog::Error_Log("Shader is not found.");
// return NULL;
}
FlushSetTexture(curvb, pfragment, ptextarg, context);
}
else
{
pfragment = &ppsRegular[curvb.curprim.fge + 2 * s_bWriteDepth];
}
GL_REPORT_ERRORD();
// set the shaders
SetShaderCaller("FlushRendererStage");
ZZshSetVertexShader(pvs[2 * ((curvb.curprim._val >> 1) & 3) + 8 * s_bWriteDepth + context]);
FlushBindProgram(pfragment, context);
GL_REPORT_ERRORD();
return pfragment;
}
inline bool AlphaCanRenderStencil(VB& curvb)
{
return g_bUpdateStencil && (PSMT_BITMODE(curvb.prndr->psm) != 1) &&
!ZZOglGet_fbmHighByte(curvb.frame.fbm) && !(conf.settings().no_stencil);
}
inline void AlphaSetStencil()
{
if (s_bDestAlphaTest && bCanRenderStencil)
{
glEnable(GL_STENCIL_TEST);
GL_STENCILFUNC(GL_ALWAYS, 0, 0);
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
}
else glDisable(GL_STENCIL_TEST);
}
//inline u32 FtoDW(float f) { return (*((u32*)&f)); }
inline void AlphaSetDepthTest(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pfragment)
{
glDepthMask(!curvb.zbuf.zmsk && curtest.zte);
// && curtest.zte && (curtest.ztst > 1) );
if (curtest.zte)
{
if (curtest.ztst > 1) g_nDepthUsed = 2;
#if 0
if ((curtest.ztst == 2) ^(g_nDepthBias != 0))
{
g_nDepthBias = (curtest.ztst == 2);
//SETRS(D3DRS_DEPTHBIAS, g_nDepthBias?FtoDW(0.0003f):FtoDW(0.000015f));
}
#endif
glDepthFunc(g_dwZCmp[curtest.ztst]);
}
GL_ZTEST(curtest.zte);
if (s_bWriteDepth)
{
if (!curvb.zbuf.zmsk)
curvb.pdepth->SetRenderTarget(1);
else
ResetRenderTarget(1);
}
}
inline u32 AlphaSetupBlendTest(VB& curvb)
{
if (curvb.curprim.abe)
SetAlphaVariables(curvb.alpha);
else
glDisable(GL_BLEND);
u32 oldabe = curvb.curprim.abe;
if (gs.pabe)
{
//ZZLog::Error_Log("PABE!");
curvb.curprim.abe = 1;
glEnable(GL_BLEND);
}
return oldabe;
}
inline void AlphaRenderFBA(VB& curvb, FRAGMENTSHADER* pfragment)
{
// needs to be before RenderAlphaTest
if ((gs.pabe) || (curvb.fba.fba && !ZZOglGet_fbmHighByte(curvb.frame.fbm)) || (s_bDestAlphaTest && bCanRenderStencil))
{
RenderFBA(curvb, pfragment);
}
}
inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pfragment, int exactcolor)
{
SetShaderCaller("AlphaRenderAlpha");
u32 dwUsingSpecialTesting = 0;
if (curvb.curprim.abe)
{
if ((bNeedBlendFactorInAlpha || ((curtest.ate && curtest.atst > ATST_ALWAYS) && (curtest.aref > 0x80))))
{
// need special stencil processing for the alpha
RenderAlphaTest(curvb, pfragment);
dwUsingSpecialTesting = 1;
}
// harvest fishing
float4 v = vAlphaBlendColor;
if (exactcolor)
{
v.y *= 255;
v.w *= 255;
}
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
}
else
{
// not using blending so set to defaults
float4 v = exactcolor ? float4(1, 510 * 255.0f / 256.0f, 0, 0) : float4(1, 2 * 255.0f / 256.0f, 0, 0);
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
}
return dwUsingSpecialTesting;
}
inline void AlphaRenderStencil(VB& curvb, u32 dwUsingSpecialTesting)
{
if (s_bDestAlphaTest && bCanRenderStencil)
{
// if not 24bit and can write to high alpha bit
RenderStencil(curvb, dwUsingSpecialTesting);
}
else
{
s_stencilref = STENCIL_SPECIAL;
s_stencilmask = STENCIL_SPECIAL;
// setup the stencil to only accept the test pixels
if (dwUsingSpecialTesting)
{
glEnable(GL_STENCIL_TEST);
glStencilMask(STENCIL_PIXELWRITE);
GL_STENCILFUNC(GL_EQUAL, STENCIL_SPECIAL | STENCIL_PIXELWRITE, STENCIL_SPECIAL);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
}
}
// #ifdef _DEBUG
// if (bDestAlphaColor == 1)
// {
// ZZLog::Debug_Log("Dest alpha blending! Manipulate alpha here.");
// }
//
// #endif
if (bCanRenderStencil && gs.pabe)
{
// only render the pixels with alpha values >= 0x80
GL_STENCILFUNC(GL_EQUAL, s_stencilref | STENCIL_FBA, s_stencilmask | STENCIL_FBA);
}
GL_REPORT_ERRORD();
}
inline void AlphaTest(VB& curvb)
{
// ZZLog::Debug_Log("%d %d %d %d %d", curvb.test.date, curvb.test.datm, gs.texa.aem, curvb.test.ate, curvb.test.atst );
// return;
// Zeydlitz changed this with a reason! It's an "Alpha more than 1 hack."
if (curvb.test.ate == 1 && curvb.test.atst == ATST_ALWAYS && curvb.test.date == 1)
{
if (curvb.test.datm == 1)
{
glAlphaFunc(GL_GREATER, 1.0f);
}
else
{
glAlphaFunc(GL_LESS, 1.0f);
ZZLog::Debug_Log("%d %d %d", curvb.test.date, curvb.test.datm, gs.texa.aem);
}
}
if (!curvb.test.ate || curvb.test.atst > ATST_NEVER)
{
Draw(curvb);
}
GL_REPORT_ERRORD();
}
inline void AlphaPabe(VB& curvb, FRAGMENTSHADER* pfragment, int exactcolor)
{
if (gs.pabe)
{
SetShaderCaller("AlphaPabe");
// only render the pixels with alpha values < 0x80
glDisable(GL_BLEND);
GL_STENCILFUNC_SET();
float4 v;
v.x = 1;
v.y = 2;
v.z = 0;
v.w = 0;
if (exactcolor) v.y *= 255;
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
Draw(curvb);
// reset
if (!s_stencilmask) s_stencilfunc = GL_ALWAYS;
GL_STENCILFUNC_SET();
}
GL_REPORT_ERRORD();
}
// Alpha Failure does not work properly on this cases. True means that no failure job should be done.
// First three cases are trivial manual.
inline bool AlphaFailureIgnore(const pixTest curtest)
{
if ((!curtest.ate) || (curtest.atst == ATST_ALWAYS) || (curtest.afail == AFAIL_KEEP)) return true;
if (conf.settings().no_alpha_fail && ((s_dwColorWrite < 8) || (s_dwColorWrite == 15 && curtest.atst == 5 && (curtest.aref == 64))))
return true;
// old and seemingly incorrect code.
// if ((s_dwColorWrite < 8 && s_dwColorWrite !=8) && curtest.afail == AFAIL_FB_ONLY)
// return true;
// if ((s_dwColorWrite == 0xf) && curtest.atst == ATST_GEQUAL && curtest.afail == AFAIL_FB_ONLY && !(conf.settings() & GAME_REGETHACK))
// return true;
return false;
}
// more work on alpha failure case
inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pfragment, int exactcolor, int oldabe)
{
// Note, case when ate == 1, atst == ATST_NEVER and afail > AFAIL_KEEP in documentation wrote as failure case. But it seems that
// either doc's are incorrect or this case has some issues.
if (AlphaFailureIgnore(curtest)) return;
#ifdef NOALFAFAIL
ZZLog::Error_Log("Alpha job here %d %d %d %d %d %d", s_dwColorWrite, curtest.atst, curtest.afail, curtest.aref, gs.pabe, s_bWriteDepth);
// return;
#endif
SetShaderCaller("AlphaFailureTestJob");
// need to reverse the test and disable some targets
glAlphaFunc(g_dwReverseAlphaCmp[curtest.atst], AlphaReferedValue(curtest.aref));
if (curtest.afail & 1) // front buffer update only
{
if (curtest.afail == AFAIL_RGB_ONLY) glColorMask(1, 1, 1, 0);// disable alpha
glDepthMask(0);
if (s_bWriteDepth) ResetRenderTarget(1);
}
else
{
// zbuffer update only
glColorMask(0, 0, 0, 0);
}
if (gs.pabe && bCanRenderStencil)
{
// only render the pixels with alpha values >= 0x80
float4 v = vAlphaBlendColor;
if (exactcolor) { v.y *= 255; v.w *= 255; }
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
glEnable(GL_BLEND);
GL_STENCILFUNC(GL_EQUAL, s_stencilref | STENCIL_FBA, s_stencilmask | STENCIL_FBA);
}
Draw(curvb);
GL_REPORT_ERRORD();
if (gs.pabe)
{
// only render the pixels with alpha values < 0x80
glDisable(GL_BLEND);
GL_STENCILFUNC_SET();
float4 v;
v.x = 1;
v.y = 2;
v.z = 0;
v.w = 0;
if (exactcolor) v.y *= 255;
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
Draw(curvb);
// reset
if (oldabe) glEnable(GL_BLEND);
if (!s_stencilmask) s_stencilfunc = GL_ALWAYS;
GL_STENCILFUNC_SET();
}
// restore
if ((curtest.afail & 1) && !curvb.zbuf.zmsk)
{
glDepthMask(1);
if (s_bWriteDepth)
{
assert(curvb.pdepth != NULL);
curvb.pdepth->SetRenderTarget(1);
}
}
GL_COLORMASK(s_dwColorWrite);
// not needed anymore since rest of ops concentrate on image processing
GL_REPORT_ERRORD();
}
inline void AlphaSpecialTesting(VB& curvb, FRAGMENTSHADER* pfragment, u32 dwUsingSpecialTesting, int exactcolor)
{
if (dwUsingSpecialTesting)
{
SetShaderCaller("AlphaSpecialTesting");
// render the real alpha
glDisable(GL_ALPHA_TEST);
glColorMask(0, 0, 0, 1);
if (s_bWriteDepth)
{
ResetRenderTarget(1);
}
glDepthMask(0);
glStencilFunc(GL_EQUAL, STENCIL_SPECIAL | STENCIL_PIXELWRITE, STENCIL_SPECIAL | STENCIL_PIXELWRITE);
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
float4 v = float4(0, exactcolor ? 510.0f : 2.0f, 0, 0);
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
Draw(curvb);
// don't need to restore
}
GL_REPORT_ERRORD();
}
inline void AlphaDestinationTest(VB& curvb, FRAGMENTSHADER* pfragment)
{
if (s_dwColorWrite & COLORMASK_ALPHA)
{
if (curvb.fba.fba)
{
ProcessFBA(curvb, pfragment);
}
else if (s_bDestAlphaTest && bCanRenderStencil)
{
// finally make sure all entries are 1 when the dest alpha >= 0x80 (if fba is 1, this is already the case)
ProcessStencil(curvb);
}
}
// if (bDestAlphaColor == 1)
// {
// // need to reset the dest colors to their original counter parts
// //ZZLog::Warn_Log("Need to reset dest alpha color");
// }
}
inline void AlphaSaveTarget(VB& curvb)
{
#ifdef _DEBUG
return; // Do nothing
//#ifdef _WIN32
// CreateDirectory("frames", NULL);
//#else
// char* strdir="";
// sprintf(strdir, "mkdir %s", "frames");
// system(strdir);
//#endif
// char str[255];
// Needs a # after frame to work properly.
// sprintf(str, "frames/frame.tga");
// //FB::Unbind(); // switch to the backbuffer
// //glFlush();
// //SaveTexture("tex.jpg", GL_TEXTURE_RECTANGLE_NV, curvb.prndr->ptex, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh));
// SaveRenderTarget(str, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh), 0);
#endif
}
inline void FlushUndoFiter(u32 dwFilterOpts)
{
if (dwFilterOpts)
{
// undo filter changes (binding didn't change)
if (dwFilterOpts & 1) glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
if (dwFilterOpts & 2) glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
}
}
// This is the most important function! It draws all collected info onscreen.
void Flush(int context)
{
FUNCLOG
VB& curvb = vb[context];
const pixTest curtest = curvb.test;
#ifdef GLSL4_API
g_cs.set_context(context);
#endif
if (FlushInitialTest(curvb, curtest, context)) return;
VisualBufferMessage(context);
GL_REPORT_ERRORD();
CRenderTarget* ptextarg = FlushGetTarget(curvb);
SwitchWireframeOff();
FlushDoContextJob(curvb, context);
u32 dwUsingSpecialTesting = 0, dwFilterOpts = 0;
int exactcolor = FlushGetExactcolor(curtest);
FRAGMENTSHADER* pfragment = FlushRendererStage(curvb, dwFilterOpts, ptextarg, exactcolor, context);
bCanRenderStencil = AlphaCanRenderStencil(curvb);
if (curtest.date || gs.pabe) SetDestAlphaTest();
AlphaSetStencil();
AlphaSetDepthTest(curvb, curtest, pfragment); // Error!
SetAlphaTest(curtest);
u32 oldabe = AlphaSetupBlendTest(curvb); // Unavoidable
// needs to be before RenderAlphaTest
AlphaRenderFBA(curvb, pfragment);
dwUsingSpecialTesting = AlphaRenderAlpha(curvb, curtest, pfragment, exactcolor); // Unavoidable
AlphaRenderStencil(curvb, dwUsingSpecialTesting);
AlphaTest(curvb); // Unavoidable
AlphaPabe(curvb, pfragment, exactcolor);
AlphaFailureTestJob(curvb, curtest, pfragment, exactcolor, oldabe);
AlphaSpecialTesting(curvb, pfragment, dwUsingSpecialTesting, exactcolor);
AlphaDestinationTest(curvb, pfragment);
AlphaSaveTarget(curvb);
GL_REPORT_ERRORD();
FlushUndoFiter(dwFilterOpts);
ppf += curvb.nCount + 0x100000;
curvb.nCount = 0;
curvb.curprim.abe = oldabe;
SwitchWireframeOn();
GL_REPORT_ERRORD();
}
void FlushBoth()
{
Flush(0);
Flush(1);
}
// Often called for several reasons
// Call flush if renderer or depth target is equal to ptr
void FlushIfNecesary(void* ptr)
{
if (vb[0].prndr == ptr || vb[0].pdepth == ptr) Flush(0);
if (vb[1].prndr == ptr || vb[1].pdepth == ptr) Flush(1);
}
inline void RenderFBA(const VB& curvb, FRAGMENTSHADER* pfragment)
{
// add fba to all pixels
GL_STENCILFUNC(GL_ALWAYS, STENCIL_FBA, 0xff);
glStencilMask(STENCIL_CLEAR);
glStencilOp(GL_ZERO, GL_KEEP, GL_REPLACE);
glDisable(GL_DEPTH_TEST);
glDepthMask(0);
glColorMask(0, 0, 0, 0);
if (s_bWriteDepth) ResetRenderTarget(1);
SetShaderCaller("RenderFBA");
glEnable(GL_ALPHA_TEST);
glAlphaFunc(GL_GEQUAL, 1);
float4 v(1,2,0,0);
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
Draw(curvb);
SetAlphaTest(curvb.test);
// reset (not necessary)
GL_COLORMASK(s_dwColorWrite);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
if (!curvb.zbuf.zmsk)
{
glDepthMask(1);
assert(curvb.pdepth != NULL);
if (s_bWriteDepth) curvb.pdepth->SetRenderTarget(1);
}
GL_ZTEST(curvb.test.zte);
}
__forceinline void RenderAlphaTest(const VB& curvb, FRAGMENTSHADER* pfragment )
{
if (!g_bUpdateStencil) return;
if ((curvb.test.ate) && (curvb.test.afail == AFAIL_FB_ONLY)) glDisable(GL_ALPHA_TEST);
glDepthMask(0);
glColorMask(0, 0, 0, 0);
if (s_bWriteDepth) ResetRenderTarget(1);
SetShaderCaller("RenderAlphaTest");
float4 v(1,2,0,0);
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
// or a 1 to the stencil buffer wherever alpha passes
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
s_stencilfunc = GL_ALWAYS;
glEnable(GL_STENCIL_TEST);
if (!s_bDestAlphaTest)
{
// clear everything
s_stencilref = 0;
glStencilMask(STENCIL_CLEAR);
glDisable(GL_ALPHA_TEST);
GL_STENCILFUNC_SET();
Draw(curvb);
if (curvb.test.ate && curvb.test.afail != AFAIL_FB_ONLY && USEALPHATESTING) glEnable(GL_ALPHA_TEST);
}
if (curvb.test.ate && curvb.test.atst > ATST_ALWAYS && curvb.test.aref > 0x80)
{
v = float4(1,1,0,0);
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
glAlphaFunc(g_dwAlphaCmp[curvb.test.atst], AlphaReferedValue(curvb.test.aref));
}
s_stencilref = STENCIL_SPECIAL;
glStencilMask(STENCIL_SPECIAL);
GL_STENCILFUNC_SET();
glDisable(GL_DEPTH_TEST);
Draw(curvb);
if (curvb.test.zte) glEnable(GL_DEPTH_TEST);
GL_ALPHATEST(0);
GL_COLORMASK(s_dwColorWrite);
if (!curvb.zbuf.zmsk)
{
glDepthMask(1);
// set rt next level
if (s_bWriteDepth) curvb.pdepth->SetRenderTarget(1);
}
}
inline void RenderStencil(const VB& curvb, u32 dwUsingSpecialTesting)
{
//NOTE: This stencil hack for dest alpha testing ONLY works when
// the geometry in one DrawPrimitive call does not overlap
// mark the stencil buffer for the new data's bits (mark 4 if alpha is >= 0xff)
// mark 4 if a pixel was written (so that the stencil buf can be changed with new values)
glStencilMask(STENCIL_PIXELWRITE);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
s_stencilmask = (curvb.test.date ? STENCIL_ALPHABIT : 0) | (dwUsingSpecialTesting ? STENCIL_SPECIAL : 0);
s_stencilfunc = s_stencilmask ? GL_EQUAL : GL_ALWAYS;
s_stencilref = curvb.test.date * curvb.test.datm | STENCIL_PIXELWRITE | (dwUsingSpecialTesting ? STENCIL_SPECIAL : 0);
GL_STENCILFUNC_SET();
}
inline void ProcessStencil(const VB& curvb)
{
assert(!curvb.fba.fba);
// set new alpha bit
glStencilMask(STENCIL_ALPHABIT);
GL_STENCILFUNC(GL_EQUAL, STENCIL_PIXELWRITE, STENCIL_PIXELWRITE | STENCIL_FBA);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glDisable(GL_DEPTH_TEST);
glDepthMask(0);
glColorMask(0, 0, 0, 0);
if (s_bWriteDepth) ResetRenderTarget(1);
GL_ALPHATEST(0);
SetShaderCaller("ProcessStencil");
ZZshSetPixelShader(ppsOne.prog);
Draw(curvb);
// process when alpha >= 0xff
GL_STENCILFUNC(GL_EQUAL, STENCIL_PIXELWRITE | STENCIL_FBA | STENCIL_ALPHABIT, STENCIL_PIXELWRITE | STENCIL_FBA);
Draw(curvb);
// clear STENCIL_PIXELWRITE bit
glStencilMask(STENCIL_CLEAR);
GL_STENCILFUNC(GL_ALWAYS, 0, STENCIL_PIXELWRITE | STENCIL_FBA);
Draw(curvb);
// restore state
GL_COLORMASK(s_dwColorWrite);
if (curvb.test.ate && USEALPHATESTING) glEnable(GL_ALPHA_TEST);
if (!curvb.zbuf.zmsk)
{
glDepthMask(1);
if (s_bWriteDepth)
{
assert(curvb.pdepth != NULL);
curvb.pdepth->SetRenderTarget(1);
}
}
GL_ZTEST(curvb.test.zte);
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
}
__forceinline void ProcessFBA(const VB& curvb, FRAGMENTSHADER* pfragment )
{
if ((curvb.frame.fbm&0x80000000)) return;
// add fba to all pixels that were written and alpha was less than 0xff
glStencilMask(STENCIL_ALPHABIT);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
GL_STENCILFUNC(GL_EQUAL, STENCIL_FBA | STENCIL_PIXELWRITE | STENCIL_ALPHABIT, STENCIL_PIXELWRITE | STENCIL_FBA);
glDisable(GL_DEPTH_TEST);
glDepthMask(0);
glColorMask(0, 0, 0, 1);
if (s_bWriteDepth) ResetRenderTarget(1);
SetShaderCaller("ProcessFBA");
// processes the pixels with ALPHA < 0x80*2
glEnable(GL_ALPHA_TEST);
glAlphaFunc(GL_LEQUAL, 1);
// add 1 to dest
GL_BLEND_ALPHA(GL_ONE, GL_ONE);
GL_BLENDEQ_ALPHA(GL_FUNC_ADD);
// FIXME: Seem dangerous
// float f = 1;
// ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, &f, "g_fOneColor");
float4 v = float4(1,1,0,0);
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
ZZshSetPixelShader(ppsOne.prog);
Draw(curvb);
glDisable(GL_ALPHA_TEST);
// reset bits
glStencilMask(STENCIL_CLEAR);
GL_STENCILFUNC(GL_GREATER, 0, STENCIL_PIXELWRITE | STENCIL_FBA);
glStencilOp(GL_KEEP, GL_KEEP, GL_ZERO);
Draw(curvb);
if ((curvb.test.atst != ATST_NEVER) && USEALPHATESTING)
{
glEnable(GL_ALPHA_TEST);
glAlphaFunc(g_dwAlphaCmp[curvb.test.atst], AlphaReferedValue(curvb.test.aref));
}
// restore (SetAlphaVariables)
GL_BLEND_ALPHA(GL_ONE, GL_ZERO);
if (vAlphaBlendColor.y < 0) GL_BLENDEQ_ALPHA(GL_FUNC_REVERSE_SUBTRACT);
// reset (not necessary)
GL_COLORMASK(s_dwColorWrite);
if (!curvb.zbuf.zmsk)
{
glDepthMask(1);
if (s_bWriteDepth) curvb.pdepth->SetRenderTarget(1);
}
GL_ZTEST(curvb.test.zte);
}
void SetContextTarget(int context)
{
FUNCLOG
VB& curvb = vb[context];
GL_REPORT_ERRORD();
if (curvb.prndr == NULL)
curvb.prndr = s_RTs.GetTarg(curvb.frame, 0, get_maxheight(curvb.gsfb.fbp, curvb.gsfb.fbw, curvb.gsfb.psm));
// make sure targets are valid
if (curvb.pdepth == NULL)
{
frameInfo f;
f.fbp = curvb.zbuf.zbp;
f.fbw = curvb.frame.fbw;
f.fbh = curvb.prndr->fbh;
f.psm = curvb.zbuf.psm;
f.fbm = 0;
curvb.pdepth = (CDepthTarget*)s_DepthRTs.GetTarg(f, CRenderTargetMngr::TO_DepthBuffer | CRenderTargetMngr::TO_StrictHeight |
(curvb.zbuf.zmsk ? CRenderTargetMngr::TO_Virtual : 0), get_maxheight(curvb.zbuf.zbp, curvb.gsfb.fbw, 0));
}
assert(curvb.prndr != NULL && curvb.pdepth != NULL);
if (curvb.pdepth->fbh != curvb.prndr->fbh) ZZLog::Debug_Log("(curvb.pdepth->fbh(0x%x) != curvb.prndr->fbh(0x%x))", curvb.pdepth->fbh, curvb.prndr->fbh);
//assert(curvb.pdepth->fbh == curvb.prndr->fbh);
if (curvb.pdepth->status & CRenderTarget::TS_Virtual)
{
if (!curvb.zbuf.zmsk)
{
CRenderTarget* ptemp = s_DepthRTs.Promote(curvb.pdepth->fbp | (curvb.pdepth->fbw << 16));
assert(ptemp == curvb.pdepth);
}
else
{
curvb.pdepth->status &= ~CRenderTarget::TS_NeedUpdate;
}
}
//bool bSetTarg = 1;
if (curvb.pdepth->status & CRenderTarget::TS_NeedUpdate)
{
assert(!(curvb.pdepth->status & CRenderTarget::TS_Virtual));
// don't update if virtual
curvb.pdepth->Update(context, curvb.prndr);
//bSetTarg = 0;
}
GL_REPORT_ERRORD();
if (curvb.prndr->status & CRenderTarget::TS_NeedUpdate)
{
/* if(bSetTarg) {
* ZZLog::Debug_Log( " Here ");
* if(s_bWriteDepth) {
* curvb.pdepth->SetRenderTarget(1);
* curvb.pdepth->SetDepthStencilSurface();
* }
* else
* curvb.pdepth->SetDepthStencilSurface();
* }*/
curvb.prndr->Update(context, curvb.pdepth);
}
else
{
//if( (vb[0].prndr != vb[1].prndr && vb[!context].bVarsSetTarg) || !vb[context].bVarsSetTarg )
curvb.prndr->SetRenderTarget(0);
//if( bSetTarg && ((vb[0].pdepth != vb[1].pdepth && vb[!context].bVarsSetTarg) || !vb[context].bVarsSetTarg) )
curvb.pdepth->SetDepthStencilSurface();
if (conf.mrtdepth && IsWriteDepth()) curvb.pdepth->SetRenderTarget(1);
if (s_ptexCurSet[0] == curvb.prndr->ptex) s_ptexCurSet[0] = 0;
if (s_ptexCurSet[1] == curvb.prndr->ptex) s_ptexCurSet[1] = 0;
curvb.prndr->SetViewport();
}
curvb.prndr->SetTarget(curvb.frame.fbp, curvb.scissor, context);
if ((curvb.zbuf.zbp - curvb.pdepth->fbp) != (curvb.frame.fbp - curvb.prndr->fbp) && curvb.test.zte)
ZZLog::Warn_Log("Frame and zbuf not aligned.");
curvb.bVarsSetTarg = true;
if (vb[!context].prndr != curvb.prndr) vb[!context].bVarsSetTarg = false;
#ifdef _DEBUG
// These conditions happen often enough that we'll just warn about it rather then abort in Debug mode.
if (curvb.prndr->status & CRenderTarget::TS_NeedUpdate)
{
ZZLog::Debug_Log("In SetContextTarget, prndr is ending with TS_NeedUpdate set.");
}
if (curvb.pdepth != NULL && (curvb.pdepth->status & CRenderTarget::TS_NeedUpdate))
{
ZZLog::Debug_Log("In SetContextTarget, pdepth is ending with TS_NeedUpdate set.");
}
#endif
GL_REPORT_ERRORD();
}
void SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint)
{
FUNCLOG
if (settexint)
{
tex0Info& tex0 = vb[context].tex0;
if (vb[context].bVarsTexSync) {
SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, true, pfragment, s_bForceTexFlush);
} else {
SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, false, pfragment, s_bForceTexFlush);
INC_TEXVARS();
}
vb[context].bVarsTexSync = true;
}
else
{
vb[context].bVarsTexSync = false;
}
}
// clamp relies on texture width
void SetTexClamping(int context, FRAGMENTSHADER* pfragment)
{
FUNCLOG
SetShaderCaller("SetTexClamping");
clampInfo* pclamp = &vb[context].clamp;
float4 v, v2;
v.x = v.y = 0;
u32* ptex = vb[context].ptexClamp;
ptex[0] = ptex[1] = 0;
float fw = vb[context].tex0.tw ;
float fh = vb[context].tex0.th ;
switch (pclamp->wms)
{
case CLAMP_REPEAT:
v2.x = -1e10;
v2.z = 1e10;
break;
case CLAMP_CLAMP: // pclamp
// suikoden5 movie text
v2.x = 0;
v2.z = 1 - 0.5f / fw;
break;
case CLAMP_REGION_CLAMP: // reg pclamp
v2.x = (pclamp->minu + 0.5f) / fw;
v2.z = (pclamp->maxu - 0.5f) / fw;
break;
case CLAMP_REGION_REPEAT: // region rep x
v.x = 0.9999f;
v.z = (float)fw;
v2.x = (float)GPU_TEXMASKWIDTH / fw;
v2.z = pclamp->maxu / fw;
int correctMinu = pclamp->minu & (~pclamp->maxu); // (A && B) || C == (A && (B && !C)) + C
if (correctMinu != g_PrevBitwiseTexX)
{
g_PrevBitwiseTexX = correctMinu;
ptex[0] = s_BitwiseTextures.GetTex(correctMinu, 0);
}
break;
}
switch (pclamp->wmt)
{
case CLAMP_REPEAT:
v2.y = -1e10;
v2.w = 1e10;
break;
case CLAMP_CLAMP: // pclamp
// suikoden5 movie text
v2.y = 0;
v2.w = 1 - 0.5f / fh;
break;
case CLAMP_REGION_CLAMP: // reg pclamp
v2.y = (pclamp->minv + 0.5f) / fh;
v2.w = (pclamp->maxv - 0.5f) / fh;
break;
case CLAMP_REGION_REPEAT: // region rep y
v.y = 0.9999f;
v.w = (float)fh;
v2.y = (float)GPU_TEXMASKWIDTH / fh;
v2.w = pclamp->maxv / fh;
int correctMinv = pclamp->minv & (~pclamp->maxv); // (A && B) || C == (A && (B && !C)) + C
if (correctMinv != g_PrevBitwiseTexY)
{
g_PrevBitwiseTexY = correctMinv;
ptex[1] = s_BitwiseTextures.GetTex(correctMinv, ptex[0]);
}
break;
}
if (pfragment->fTexWrapMode != 0)
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexWrapMode, v, "g_fTexWrapMode");
if (pfragment->fClampExts != 0)
ZZshSetParameter4fv(pfragment->prog, pfragment->fClampExts, v2, "g_fClampExts");
}
int CheckTexArray[4][2][2][2] = {{{{0, }}}};
void SetTexVariables(int context, FRAGMENTSHADER* pfragment)
{
FUNCLOG
if (!vb[context].curprim.tme) return;
assert(!vb[context].bNeedTexCheck);
float4 v, v2;
tex0Info& tex0 = vb[context].tex0;
//float fw = (float)tex0.tw;
//float fh = (float)tex0.th;
if (!vb[context].bTexConstsSync)
{
SetShaderCaller("SetTexVariables");
// alpha and texture highlighting
float4 valpha, valpha2;
// if clut, use the frame format
int psm = PIXEL_STORAGE_FORMAT(tex0);
// ZZLog::Error_Log( "A %d psm, is-clut %d. cpsm %d | %d %d", psm, PSMT_ISCLUT(psm), tex0.cpsm, tex0.tfx, tex0.tcc );
/* tcc -- Texture Color Component 0=RGB, 1=RGBA + use Alpha from TEXA reg when not in PSM
* tfx -- Texture Function (0=modulate, 1=decal, 2=hilight, 3=hilight2)
*
* valpha2 = 0 0 2 1 0 0 2 1
* 1 0 0 0 1 1 0 0
* 0 0 2 0 0 1 2 0
* 0 0 2 0 0 1 2 0
*
* 0 1,!nNeed 1, psm=2, 10 1, psm=1
* valpha = 0 0 0 1 0 2 0 0 2ta0 2ta1-2ta0 0 0 2ta0 0 0 0
* 0 0 0 1 0 1 0 0 ta0 ta1-ta0 0 0 ta0 0 0 0
* 0 0 1 1 0 1 1 1 1 1 ta0 0 1 1
* 0 0 1 1 0 1 1 0 1 0 ta0 0 1 0
*/
valpha2.x = (tex0.tfx == TFX_DECAL);
valpha2.y = (tex0.tcc == 1) && (tex0.tfx != TFX_MODULATE);
valpha2.z = (tex0.tfx != TFX_DECAL) * 2;
valpha2.w = (tex0.tfx == TFX_MODULATE);
if (tex0.tcc && PSMT_ALPHAEXP(psm))
{
valpha.x = (gs.texa.ta[0] / 255.0f) * (1 + (tex0.tfx == TFX_MODULATE));
if (psm == PSMCT24)
{
valpha.y = 0;
}
else
{
valpha.y = ((gs.texa.ta[1] - gs.texa.ta[0]) / 255.0f) * (1 + (tex0.tfx == TFX_MODULATE));
}
}
else
{
valpha.x = 0;
valpha.y = (!!tex0.tcc) * (1 + (tex0.tfx == TFX_MODULATE));
}
valpha.z = (tex0.tfx == TFX_HIGHLIGHT2);
valpha.w = (tex0.tcc == 0) || (tex0.tcc == 1 && tex0.tfx == TFX_HIGHLIGHT);
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexAlpha, valpha, "g_fTexAlpha");
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2");
if (IsAlphaTestExpansion(tex0))
{
float4 vblack;
vblack.x = vblack.y = vblack.z = vblack.w = 10;
if (tex0.tcc && gs.texa.aem && psm == PSMCT24) vblack.w = 0;
ZZshSetParameter4fv(pfragment->prog, pfragment->fTestBlack, vblack, "g_fTestBlack");
}
SetTexClamping(context, pfragment);
vb[context].bTexConstsSync = true;
}
if (s_bTexFlush)
{
if (PSMT_ISCLUT(tex0.psm))
texClutWrite(context);
else
s_bTexFlush = false;
}
}
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force)
{
FUNCLOG
float4 v;
CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(tex0, 1);
assert( pmemtarg != NULL && pfragment != NULL && pmemtarg->ptex != NULL);
if (pmemtarg == NULL || pfragment == NULL || pmemtarg->ptex == NULL)
{
ZZLog::Error_Log("SetTexVariablesInt error.");
return;
}
if (CheckVB && vb[context].pmemtarg == pmemtarg) return;
SetShaderCaller("SetTexVariablesInt");
float fw = (float)tex0.tw;
float fh = (float)tex0.th;
bool bUseBilinear = bilinear > 1 || (bilinear && conf.bilinear);
if (bUseBilinear)
{
v.x = (float)fw;
v.y = (float)fh;
v.z = 1.0f / (float)fw;
v.w = 1.0f / (float)fh;
ZZshSetParameter4fvWithRetry(&pfragment->fRealTexDims, pfragment->prog, v, "g_fRealTexDims");
}
if (m_Blocks[tex0.psm].bpp == 0)
{
ZZLog::Error_Log("Undefined tex psm 0x%x!", tex0.psm);
return;
}
const BLOCK& b = m_Blocks[tex0.psm];
float fbw = (float)tex0.tbw;
float4 vTexDims;
vTexDims.x = b.vTexDims.x * (fw);
vTexDims.y = b.vTexDims.y * (fh);
vTexDims.z = (float)BLOCK_TEXWIDTH * (0.002f / 64.0f + 0.01f / 128.0f);
vTexDims.w = (float)BLOCK_TEXHEIGHT * 0.1f / 512.0f;
if (bUseBilinear)
{
vTexDims.x *= 1 / 128.0f;
vTexDims.y *= 1 / 512.0f;
vTexDims.z *= 1 / 128.0f;
vTexDims.w *= 1 / 512.0f;
}
float g_fitexwidth = g_fiGPU_TEXWIDTH / (float)pmemtarg->widthmult;
//float g_texwidth = GPU_TEXWIDTH*(float)pmemtarg->widthmult;
float fpage = tex0.tbp0 * (64.0f * g_fitexwidth);// + 0.05f * g_fitexwidth;
float fpageint = floorf(fpage);
//int starttbp = (int)fpage;
// 2048 is number of words to span one page
//float fblockstride = (2048.0f /(float)(g_texwidth*BLOCK_TEXWIDTH)) * b.vTexDims.x * fbw;
float fblockstride = (2048.0f / (float)(GPU_TEXWIDTH * (float)pmemtarg->widthmult * BLOCK_TEXWIDTH)) * b.vTexDims.x * fbw;
assert(fblockstride >= 1.0f);
v.x = (float)(2048 * g_fitexwidth);
v.y = fblockstride;
v.z = g_fBlockMult / (float)pmemtarg->widthmult;
v.w = fpage - fpageint ;
if (g_fBlockMult > 1)
{
// make sure to divide by mult (since the G16R16 texture loses info)
v.z *= b.bpp * (1 / 32.0f);
}
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexDims, vTexDims, "g_fTexDims");
// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from float4 to float[4] is ok.
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock");
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
// get hardware texture dims
//int texheight = pmemtarg->texH;
int texwidth = pmemtarg->texW;
v.y = 1.0f;
v.x = (fpageint - (float)pmemtarg->realy / (float)pmemtarg->widthmult + 0.5f);//*v.y;
v.z = (float)texwidth;
/* if( !(g_nPixelShaderVer & SHADER_ACCURATE) || bUseBilinear ) {
if (tex0.psm == PSMT4 )
v.w = 0.0f;
else
v.w = 0.25f;
}
else
v.w = 0.5f;*/
v.w = 0.5f;
ZZshSetParameter4fv(pfragment->prog, pfragment->fPageOffset, v, "g_fPageOffset");
if (force)
s_ptexCurSet[context] = pmemtarg->ptex->tex;
else
s_ptexNextSet[context] = pmemtarg->ptex->tex;
vb[context].pmemtarg = pmemtarg;
vb[context].bVarsTexSync = false;
}
#if 0
#define SET_ALPHA_COLOR_FACTOR(sign) \
{ \
switch(a.c) \
{ \
case 0: \
vAlphaBlendColor.y = (sign) ? 2.0f*255.0f/256.0f : -2.0f*255.0f/256.0f; \
s_srcalpha = GL_ONE; \
s_alphaeq = (sign) ? GL_FUNC_ADD : GL_FUNC_REVERSE_SUBTRACT; \
break; \
\
case 1: \
/* if in 24 bit mode, dest alpha should be one */ \
switch(PSMT_BITMODE(vb[icurctx].prndr->psm)) \
{ \
case 0: \
bDestAlphaColor = (a.d!=2)&&((a.a==a.d)||(a.b==a.d)); \
break; \
\
case 1: \
/* dest alpha should be one */ \
bDestAlphaColor = 2; \
break; \
/* default: 16bit surface, so returned alpha is ok */ \
} \
break; \
\
case 2: \
bNeedBlendFactorInAlpha = true; /* should disable alpha channel writing */ \
vAlphaBlendColor.y = 0; \
vAlphaBlendColor.w = (sign) ? (float)a.fix * (2.0f/255.0f) : (float)a.fix * (-2.0f/255.0f); \
usec = 0; /* change so that alpha comes from source*/ \
break; \
} \
}
#if 0
if( a.fix <= 0x80 ) { \
dwTemp = (a.fix*2)>255?255:(a.fix*2); \
dwTemp = dwTemp|(dwTemp<<8)|(dwTemp<<16)|0x80000000; \
ZZLog::Debug_Log("bfactor: %8.8x", dwTemp); \
glBlendColorEXT(dwTemp); \
} \
else { \
#endif
//void ResetAlphaVariables() {
// FUNCLOG
//}
inline void NeedFactor(int w)
{
if (bDestAlphaColor == 2)
{
bNeedBlendFactorInAlpha = (w + 1) ? true : false;
vAlphaBlendColor.y = 0;
vAlphaBlendColor.w = (float)w;
}
}
#endif
template<bool SIGN, bool NEED_FACTOR>
__forceinline int Set_Alpha_Color_Factor(const alphaInfo& a)
{
int usec = 0;
switch(a.c)
{
case 0:
usec = 0;
/* Note: there are already default value when sign is 1. So only change them
* when sign is 0
*/
if (!SIGN) {
vAlphaBlendColor.y = -2.0f*255.0f/256.0f;
s_alphaeq = GL_FUNC_REVERSE_SUBTRACT;
}
break;
case 1:
usec = 1;
/* if in 24 bit mode, dest alpha should be one */
if(PSMT_BITMODE(vb[icurctx].prndr->psm) == 1) {
/* dest alpha should be one */
ZZLog::Debug_Log("Alpha on a 24 bits framebuffer, good place to check the value of blend");
// FIMXE: the array constains GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA
// It will love a test with GL_ONE and GL_ZERO
usec = 4;
// need a factor correction
if (NEED_FACTOR) {
bNeedBlendFactorInAlpha = (SIGN) ? true : false;
vAlphaBlendColor.y = 0;
vAlphaBlendColor.w = (SIGN) ? 1.0 : -1.0;
}
}
break;
case 2:
usec = 0; /* change so that alpha comes from source*/
bNeedBlendFactorInAlpha = true; /* should disable alpha channel writing */
vAlphaBlendColor.y = 0;
vAlphaBlendColor.w = (SIGN) ? (float)a.fix * (2.0f/255.0f) : (float)a.fix * (-2.0f/255.0f);
break;
default:
assert(0);
}
return usec;
}
//static int CheckArray[48][2] = {{0,}};
void SetAlphaVariables(const alphaInfo& a)
{
FUNCLOG
bool alphaenable = true;
// TODO: negative color when not clamping turns to positive???
// g_vars._bAlphaState = 0; // set all to zero
bNeedBlendFactorInAlpha = false;
// b2XAlphaTest = 1;
//u32 dwTemp = 0xffffffff;
// bDestAlphaColor = 0;
// default
s_srcalpha = GL_ONE;
s_dstalpha = GL_ZERO;
s_alphaeq = GL_FUNC_ADD;
s_rgbeq = GL_FUNC_ADD;
// s_alphaInfo = a;
vAlphaBlendColor = float4(1, 2 * 255.0f / 256.0f, 0, 0);
// u32 usec = a.c;
u32 usec;
const bool POS_A = true;
const bool NEG_A = false;
const bool NEED_FACTOR = true;
/*
* Alpha table
* a + b + d
* S D
* 0 a -a 1 | 0 0 0
* 1 0 0 0 | a -a 1
* 2 0 0 0 | 0 0 0
*
* d = 0 Cs
* a b 0 Cs 1 Cd 2 0
* | |
* 0 000: a+-a+ 1 | 0+ 0+ 0 = 1 | 010: a+ 0+ 1 | 0+-a+ 0 = 1-(-a)(+)(-a) | 020: a+ 0+ 1 | 0+ 0+ 0 = 1-(-a) (+) 0
* 1 100: 0+-a+ 1 | a+ 0+ 0 = 1-a (+) a | 110: 0+ 0+ 1 | a+-a+ 0 = 1 | 120: 0+ 0+ 1 | a+ 0+ 0 = 1 (+) a
* 2 200: 0+-a+ 1 | 0+ 0+ 0 = 1-a (+) 0 | 210: 0+ 0+ 1 | 0+-a+ 0 = 1 (-) a | 220: 0+ 0+ 1 | 0+ 0+ 0 = 1
*
* d = 1 Cd
* 0 | 1 | 2
* 0 001: a+-a+ 0 | 0+ 0+ 1 = 0 (+) 1 | 011: a+ 0+ 0 | 0+-a+ 1 = a (+) 1-a | 021: a+ 0+ 0 | 0+ 0+ 1 = a (+) 1
* 1 101: 0+-a+ 0 | a+ 0+ 1 = (-a)(+) 1-(-a) | 111: 0+ 0+ 0 | a+-a+ 1 = 0 (+) 1 | 121: 0+ 0+ 0 | a+ 0+ 1 = 0 (+) 1-(-a)
* 2 201: 0+-a+ 0 | 0+ 0+ 1 = a (R-)1 | 211: 0+ 0+ 0 | 0+-a+ 1 = 0 (+) 1-a | 221: 0+ 0+ 0 | 0+ 0+ 1 = 0 (+) 1
*
* d = 2 0
* 0 | 1 | 2
* 0 002: a+-a+ 0 | 0+ 0+ 0 = 0 | 012: a+ 0+ 0 | 0+-a+ 0 = a (-) a | 022: a+ 0+ 0 | 0+ 0+ 0 = a (+) 0
* 1 102: 0+-a+ 0 | a+ 0+ 0 = a (R-) a | 112: 0+ 0+ 0 | a+-a+ 0 = 0 | 122: 0+ 0+ 0 | a+ 0+ 0 = 0 (+) a
* 2 202: 0+-a+ 0 | 0+ 0+ 0 = a (R-) 0 | 212: 0+ 0+ 0 | 0+-a+ 0 = 0 (-) a | 222: 0+ 0+ 0 | 0+ 0+ 0 = 0
*
* Formulae is: (a-b) * (c /128) + d
* 0 1 2
* a Cs Cd 0
* b Cs Cd 0
* c As Ad ALPHA.FIX
* d Cs Cd 0
*
* We want to emulate Cs * F1(alpha) + Cd * F2(alpha) by OpenGl blending: (Cs * Ss (+,-,R-) Cd * Sd)
* SET_ALPHA_COLOR_FACTOR(sign) set Set A (as As>>7, Ad>>7 or FIX>>7) with sign.
* So we could use 1+a as one_minus_alpha and -a as alpha.
*
*/
int code = (a.a * 16) + (a.b * 4) + a.d ;
// #define one_minus_alpha (bDestAlphaColor == 2) ? GL_ONE_MINUS_SRC_ALPHA : blendinvalpha[usec]
// #define alpha (bDestAlphaColor == 2) ? GL_SRC_ALPHA : blendalpha[usec]
// #define one (bDestAlphaColor == 2) ? GL_ONE : blendalpha[usec]
// #define zero (bDestAlphaColor == 2) ? GL_ZERO : blendinvalpha[usec]
#define one_minus_alpha blendinvalpha[usec]
#define alpha blendalpha[usec]
switch (code)
{
case 0: // 000 // Cs -- nothing changed
case 20: // 110 = 16+4=20 // Cs
case 40: // 220 = 32+8=40 // Cs
{
alphaenable = false;
break;
}
case 2: //002 // 0 -- should be zero
case 22: //112 // 0
case 42: //222 = 32+8+2 =42 // 0
{
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = GL_ZERO;
s_dstrgb = GL_ZERO;
break;
}
case 1: //001 // Cd -- Should be destination alpha
case 21: //111, // Cd -- 0*Source + 1*Desrinarion
case 41: //221 = 32+8+1=41 // Cd --
{
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = GL_ZERO;
s_dstrgb = GL_ONE;
break;
}
case 4: // 010 // (Cs-Cd)*A+Cs = Cs * (A + 1) - Cd * A
{
// bAlphaClamping = 3;
// SET_ALPHA_COLOR_FACTOR(0); // a = -A
usec = Set_Alpha_Color_Factor<NEG_A, NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD; // Cs*(1-a)+Cd*a
s_srcrgb = one_minus_alpha ;
s_dstrgb = alpha;
// NeedFactor(-1);
break;
}
case 5: // 011 // (Cs-Cd)*A+Cs = Cs * A + Cd * (1-A)
{
// bAlphaClamping = 3; // all testing
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = alpha;
s_dstrgb = one_minus_alpha;
// NeedFactor(1);
break;
}
case 6: //012 // (Cs-Cd)*A = Cs*A - Cd*A
{
// bAlphaClamping = 3;
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_SUBTRACT;
s_srcrgb = alpha;
s_dstrgb = alpha;
break;
}
case 8: //020 // Cs*A+Cs = Cs * (1+A)
{
// bAlphaClamping = 2; // max testing
// SET_ALPHA_COLOR_FACTOR(0); // Zeydlitz change this! a = -A
usec = Set_Alpha_Color_Factor<NEG_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = one_minus_alpha; // Cs*(1-a).
s_dstrgb = GL_ZERO;
// NeedFactor(1);
break;
}
case 9: //021 // Cs*A+Cd
{
// bAlphaClamping = 2; // max testing
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = alpha; // ZZ change it too.
s_dstrgb = GL_ONE;
break;
}
case 10: //022 // Cs*A
{
// bAlphaClamping = 2; // max testing
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = alpha;
s_dstrgb = GL_ZERO;
break;
}
case 16: //100 // (Cd-Cs)*A + Cs = Cd*A + Cs*(1-A)
{
// bAlphaClamping = 3;
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = one_minus_alpha;
s_dstrgb = alpha;
// NeedFactor(1);
break;
}
case 17: //101 // (Cd-Cs)*A + Cd = Cd*(A+1) - A*Cs
{
// bAlphaClamping = 3; // all testing
// SET_ALPHA_COLOR_FACTOR(0);
usec = Set_Alpha_Color_Factor<NEG_A, NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = alpha;
s_dstrgb = one_minus_alpha;
// NeedFactor(-1);
break;
}
case 18: //102 // (Cd-Cs)*A = Cd*A - Cs*A
{
// bAlphaClamping = 3;
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_REVERSE_SUBTRACT;
s_srcrgb = alpha;
s_dstrgb = alpha;
break;
}
case 24: //120 = 16+8 // Cd*A + Cs
{
// bAlphaClamping = 2; // max testing
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = GL_ONE;
s_dstrgb = alpha;
break;
}
case 25: //121 // Cd*(1+A)
{
// bAlphaClamping = 2; // max testing
// SET_ALPHA_COLOR_FACTOR(0);
usec = Set_Alpha_Color_Factor<NEG_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = GL_ZERO;
s_dstrgb = one_minus_alpha;
// NeedFactor(-1);
break;
}
case 26: //122 // Cd*A
{
// bAlphaClamping = 2;
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = GL_ZERO;
s_dstrgb = alpha;
break;
}
case 32: // 200 = 32 // -Cs*A + Cs = Cs*(1-A)
{
// bAlphaClamping = 1; // min testing
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = one_minus_alpha;
s_dstrgb = GL_ZERO;
break;
}
case 33: //201 // -Cs*A + Cd
{
// bAlphaClamping = 1; // min testing
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_REVERSE_SUBTRACT;
s_srcrgb = alpha;
s_dstrgb = GL_ONE;
break;
}
case 34: //202 // -Cs*A
case 38: //212 // -Cd*A
{
// bAlphaClamping = 1; // min testing -- negative values
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = GL_ZERO;
s_dstrgb = GL_ZERO;
break;
}
case 36: //210 // -Cd*A + Cs
{
// bAlphaClamping = 1; // min testing
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_SUBTRACT;
s_srcrgb = GL_ONE;
s_dstrgb = alpha;
break;
}
case 37: //211 // -Cd*A+Cd = Cd * (1-A)
{
// bAlphaClamping = 1; // min testing
// SET_ALPHA_COLOR_FACTOR(1);
usec = Set_Alpha_Color_Factor<POS_A, !NEED_FACTOR>(a);
s_rgbeq = GL_FUNC_ADD;
s_srcrgb = GL_ZERO;
s_dstrgb = one_minus_alpha;
break;
}
default:
{
ZZLog::Error_Log("Bad alpha code %d | %d %d %d", code, a.a, a.b, a.d);
}
}
/*
int t_rgbeq = GL_FUNC_ADD;
int t_srcrgb = GL_ONE;
int t_dstrgb = GL_ZERO;
int tAlphaClamping = 0;
if( a.a == a.b )
{ // just d remains
if( a.d == 0 ) {}
else
{
t_dstrgb = a.d == 1 ? GL_ONE : GL_ZERO;
t_srcrgb = GL_ZERO;
t_rgbeq = GL_FUNC_ADD; //a) (001) (111) (221) b) (002) (112) (222)
}
goto EndSetAlpha;
}
else if( a.d == 2 )
{ // zero
if( a.a == 2 )
{
// zero all color
t_srcrgb = GL_ZERO;
t_dstrgb = GL_ZERO;
goto EndSetAlpha; // (202) (212)
}
else if( a.b == 2 )
{
//b2XAlphaTest = 1; // a) (022) // b) (122)
SET_ALPHA_COLOR_FACTOR(1);
if( bDestAlphaColor == 2 )
{
t_rgbeq = GL_FUNC_ADD;
t_srcrgb = a.a == 0 ? GL_ONE : GL_ZERO;
t_dstrgb = a.a == 0 ? GL_ZERO : GL_ONE;
}
else
{
tAlphaClamping = 2;
t_rgbeq = GL_FUNC_ADD;
t_srcrgb = a.a == 0 ? blendalpha[usec] : GL_ZERO;
t_dstrgb = a.a == 0 ? GL_ZERO : blendalpha[usec];
}
goto EndSetAlpha;
}
// nothing is zero, so must do some real blending //b2XAlphaTest = 1; //a) (012) //b) (102)
tAlphaClamping = 3;
SET_ALPHA_COLOR_FACTOR(1);
t_rgbeq = a.a == 0 ? GL_FUNC_SUBTRACT : GL_FUNC_REVERSE_SUBTRACT;
t_srcrgb = bDestAlphaColor == 2 ? GL_ONE : blendalpha[usec];
t_dstrgb = bDestAlphaColor == 2 ? GL_ONE : blendalpha[usec];
}
else if( a.a == 2 )
{ // zero
//b2XAlphaTest = 1;
tAlphaClamping = 1; // min testing
SET_ALPHA_COLOR_FACTOR(1);
if( a.b == a.d )
{
// can get away with 1-A
// a.a == a.d == 2!! (200) (211)
t_rgbeq = GL_FUNC_ADD;
t_srcrgb = (a.b == 0 && bDestAlphaColor != 2) ? blendinvalpha[usec] : GL_ZERO;
t_dstrgb = (a.b == 0 || bDestAlphaColor == 2) ? GL_ZERO : blendinvalpha[usec];
}
else
{
// a) (201) b)(210)
t_rgbeq = a.b==0 ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_SUBTRACT;
t_srcrgb = (a.b == 0 && bDestAlphaColor != 2) ? blendalpha[usec] : GL_ONE;
t_dstrgb = (a.b == 0 || bDestAlphaColor == 2 ) ? GL_ONE : blendalpha[usec];
}
}
else if( a.b == 2 )
{
tAlphaClamping = 2; // max testing
SET_ALPHA_COLOR_FACTOR(a.a!=a.d);
if( a.a == a.d )
{
// can get away with 1+A, but need to set alpha to negative
// a)(020)
// b)(121)
t_rgbeq = GL_FUNC_ADD;
if( bDestAlphaColor == 2 )
{
t_srcrgb = (a.a == 0) ? GL_ONE_MINUS_SRC_ALPHA : GL_ZERO;
t_dstrgb = (a.a == 0) ? GL_ZERO : GL_ONE_MINUS_SRC_ALPHA;
}
else
{
t_srcrgb = a.a == 0 ? blendinvalpha[usec] : GL_ZERO;
t_dstrgb = a.a == 0 ? GL_ZERO : blendinvalpha[usec];
}
}
else
{
//a)(021) //b)(120) //b2XAlphaTest = 1;
t_rgbeq = GL_FUNC_ADD;
t_srcrgb = (a.a == 0 && bDestAlphaColor != 2) ? blendalpha[usec] : GL_ONE;
t_dstrgb = (a.a == 0 || bDestAlphaColor == 2) ? GL_ONE : blendalpha[usec];
}
}
else
{
// all 3 components are valid!
tAlphaClamping = 3; // all testing
SET_ALPHA_COLOR_FACTOR(a.a!=a.d);
if( a.a == a.d )
{
// can get away with 1+A, but need to set alpha to negative // a) 010, // b) 101
t_rgbeq = GL_FUNC_ADD;
if( bDestAlphaColor == 2 )
{
// all ones
t_srcrgb = a.a == 0 ? GL_ONE_MINUS_SRC_ALPHA : GL_SRC_ALPHA;
t_dstrgb = a.a == 0 ? GL_SRC_ALPHA : GL_ONE_MINUS_SRC_ALPHA;
}
else
{
t_srcrgb = a.a == 0 ? blendinvalpha[usec] : blendalpha[usec];
t_dstrgb = a.a == 0 ? blendalpha[usec] : blendinvalpha[usec];
}
}
else
{
t_rgbeq = GL_FUNC_ADD; // a) 011 // b) 100 //
if( bDestAlphaColor == 2 )
{
// all ones
t_srcrgb = a.a != 0 ? GL_ONE_MINUS_SRC_ALPHA : GL_SRC_ALPHA;
t_dstrgb = a.a != 0 ? GL_SRC_ALPHA : GL_ONE_MINUS_SRC_ALPHA;
}
else
{
//b2XAlphaTest = 1;
t_srcrgb = a.a != 0 ? blendinvalpha[usec] : blendalpha[usec];
t_dstrgb = a.a != 0 ? blendalpha[usec] : blendinvalpha[usec];
}
}
}
EndSetAlpha:
if ( alphaenable && (t_rgbeq != s_rgbeq || s_srcrgb != t_srcrgb || t_dstrgb != s_dstrgb || tAlphaClamping != bAlphaClamping)) {
if (CheckArray[code][(bDestAlphaColor==2)] != -1) {
ZZLog::Debug_Log( "A code %d, 0x%x, 0x%x, 0x%x, 0x%x %d", code, alpha, one_minus_alpha, one, zero, bDestAlphaColor );
ZZLog::Debug_Log( " Difference %d %d %d %d | 0x%x 0x%x | 0x%x 0x%x | 0x%x 0x%x | %d %d",
code, a.a, a.b, a.d,
t_rgbeq, s_rgbeq, t_srcrgb, s_srcrgb, t_dstrgb, s_dstrgb, tAlphaClamping, bAlphaClamping);
CheckArray[code][(bDestAlphaColor==2)] = -1;
}
}
else
if (CheckArray[code][(bDestAlphaColor==2)] == 0){
ZZLog::Debug_Log( "Add good code %d %d, psm %d destA %d", code, a.c, vb[icurctx].prndr->psm, bDestAlphaColor);
CheckArray[code][(bDestAlphaColor==2)] = 1;
}*/
if (alphaenable)
{
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha);
zgsBlendEquationSeparateEXT(s_rgbeq, s_alphaeq);
glEnable(GL_BLEND); // always set
}
else
{
glDisable(GL_BLEND);
}
INC_ALPHAVARS();
}
void SetWriteDepth()
{
FUNCLOG
if (conf.mrtdepth)
{
s_bWriteDepth = true;
s_nWriteDepthCount = 4;
}
}
bool IsWriteDepth()
{
FUNCLOG
return s_bWriteDepth;
}
bool IsWriteDestAlphaTest()
{
FUNCLOG
return s_bDestAlphaTest;
}
void SetDestAlphaTest()
{
FUNCLOG
s_bDestAlphaTest = true;
s_nWriteDestAlphaTest = 4;
}
void SetTexFlush()
{
FUNCLOG
s_bTexFlush = true;
// if( PSMT_ISCLUT(vb[0].tex0.psm) )
// texClutWrite(0);
// if( PSMT_ISCLUT(vb[1].tex0.psm) )
// texClutWrite(1);
if (!s_bForceTexFlush)
{
if (s_ptexCurSet[0] != s_ptexNextSet[0]) s_ptexCurSet[0] = s_ptexNextSet[0];
if (s_ptexCurSet[1] != s_ptexNextSet[1]) s_ptexCurSet[1] = s_ptexNextSet[1];
}
}