mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #749 from PCSX2/gsdx-micro-optimization
Gsdx micro optimization
This commit is contained in:
commit
6046d6c417
|
@ -15,6 +15,8 @@ endif()
|
||||||
set(CommonFlags
|
set(CommonFlags
|
||||||
# GCC-4.6 crash pcsx2 during the binding of plugins at startup...
|
# GCC-4.6 crash pcsx2 during the binding of plugins at startup...
|
||||||
# Disable this optimization for the moment
|
# Disable this optimization for the moment
|
||||||
|
# GCC-4.9 update:
|
||||||
|
# Crash when you start a game. Likely a stack corruption/alignment
|
||||||
-fno-omit-frame-pointer
|
-fno-omit-frame-pointer
|
||||||
# END GCC-4.6
|
# END GCC-4.6
|
||||||
-fno-strict-aliasing
|
-fno-strict-aliasing
|
||||||
|
|
|
@ -48,10 +48,6 @@ endif()
|
||||||
#Clang doesn't support a few common flags that GCC does.
|
#Clang doesn't support a few common flags that GCC does.
|
||||||
if(NOT USE_CLANG)
|
if(NOT USE_CLANG)
|
||||||
set(GSdxFinalFlags ${GSdxFinalFlags} -fabi-version=6)
|
set(GSdxFinalFlags ${GSdxFinalFlags} -fabi-version=6)
|
||||||
if (_M_X86_32 AND NOT USE_ASAN)
|
|
||||||
# Someone need to seriously test the build of GSdx without this option
|
|
||||||
set(GSdxFinalFlags ${GSdxFinalFlags} -mpreferred-stack-boundary=2)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(GSdxSources
|
set(GSdxSources
|
||||||
|
|
|
@ -28,13 +28,12 @@ namespace GLState {
|
||||||
GSVector4i scissor;
|
GSVector4i scissor;
|
||||||
|
|
||||||
bool blend;
|
bool blend;
|
||||||
GLenum eq_RGB;
|
uint16 eq_RGB;
|
||||||
GLenum f_sRGB;
|
uint16 f_sRGB;
|
||||||
GLenum f_dRGB;
|
uint16 f_dRGB;
|
||||||
|
uint8 bf;
|
||||||
uint32 wrgba;
|
uint32 wrgba;
|
||||||
|
|
||||||
float bf;
|
|
||||||
|
|
||||||
bool depth;
|
bool depth;
|
||||||
GLenum depth_func;
|
GLenum depth_func;
|
||||||
bool depth_mask;
|
bool depth_mask;
|
||||||
|
@ -58,14 +57,6 @@ namespace GLState {
|
||||||
GLuint vs;
|
GLuint vs;
|
||||||
GLuint program;
|
GLuint program;
|
||||||
bool dirty_prog;
|
bool dirty_prog;
|
||||||
#if 0
|
|
||||||
struct {
|
|
||||||
GSVertexBufferStateOGL* vb;
|
|
||||||
GSDepthStencilOGL* dss;
|
|
||||||
GSBlendStateOGL* bs;
|
|
||||||
float bf; // blend factor
|
|
||||||
} m_state;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void Clear() {
|
void Clear() {
|
||||||
fbo = 0;
|
fbo = 0;
|
||||||
|
@ -76,8 +67,8 @@ namespace GLState {
|
||||||
eq_RGB = 0;
|
eq_RGB = 0;
|
||||||
f_sRGB = 0;
|
f_sRGB = 0;
|
||||||
f_dRGB = 0;
|
f_dRGB = 0;
|
||||||
|
bf = 0;
|
||||||
wrgba = 0xF;
|
wrgba = 0xF;
|
||||||
bf = 0.0;
|
|
||||||
|
|
||||||
depth = false;
|
depth = false;
|
||||||
depth_func = 0;
|
depth_func = 0;
|
||||||
|
|
|
@ -30,11 +30,11 @@ namespace GLState {
|
||||||
extern GSVector4i scissor;
|
extern GSVector4i scissor;
|
||||||
|
|
||||||
extern bool blend;
|
extern bool blend;
|
||||||
extern GLenum eq_RGB;
|
extern uint16 eq_RGB;
|
||||||
extern GLenum f_sRGB;
|
extern uint16 f_sRGB;
|
||||||
extern GLenum f_dRGB;
|
extern uint16 f_dRGB;
|
||||||
|
extern uint8 bf;
|
||||||
extern uint32 wrgba;
|
extern uint32 wrgba;
|
||||||
extern float bf;
|
|
||||||
|
|
||||||
extern bool depth;
|
extern bool depth;
|
||||||
extern GLenum depth_func;
|
extern GLenum depth_func;
|
||||||
|
|
|
@ -353,7 +353,6 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
|
||||||
ASSERT(sizeof(PSSamplerSelector) == 4);
|
ASSERT(sizeof(PSSamplerSelector) == 4);
|
||||||
ASSERT(sizeof(OMDepthStencilSelector) == 4);
|
ASSERT(sizeof(OMDepthStencilSelector) == 4);
|
||||||
ASSERT(sizeof(OMColorMaskSelector) == 4);
|
ASSERT(sizeof(OMColorMaskSelector) == 4);
|
||||||
ASSERT(sizeof(OMBlendSelector) == 4);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -673,6 +672,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
||||||
+ format("#define PS_WRITE_RG %d\n", sel.write_rg)
|
+ format("#define PS_WRITE_RG %d\n", sel.write_rg)
|
||||||
+ format("#define PS_FBMASK %d\n", sel.fbmask)
|
+ format("#define PS_FBMASK %d\n", sel.fbmask)
|
||||||
+ format("#define PS_HDR %d\n", sel.hdr)
|
+ format("#define PS_HDR %d\n", sel.hdr)
|
||||||
|
+ format("#define PS_PABE %d\n", sel.pabe);
|
||||||
;
|
;
|
||||||
|
|
||||||
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
||||||
|
@ -1342,7 +1342,7 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::OMSetBlendState(int blend_index, float blend_factor, bool is_blend_constant)
|
void GSDeviceOGL::OMSetBlendState(uint8 blend_index, uint8 blend_factor, bool is_blend_constant)
|
||||||
{
|
{
|
||||||
if (blend_index) {
|
if (blend_index) {
|
||||||
if (!GLState::blend) {
|
if (!GLState::blend) {
|
||||||
|
@ -1352,28 +1352,27 @@ void GSDeviceOGL::OMSetBlendState(int blend_index, float blend_factor, bool is_b
|
||||||
|
|
||||||
if (is_blend_constant && GLState::bf != blend_factor) {
|
if (is_blend_constant && GLState::bf != blend_factor) {
|
||||||
GLState::bf = blend_factor;
|
GLState::bf = blend_factor;
|
||||||
gl_BlendColor(blend_factor, blend_factor, blend_factor, 0);
|
float bf = (float)blend_factor / 128.0f;
|
||||||
|
gl_BlendColor(bf, bf, bf, bf);
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME test to use uint16 (cache friendly)
|
const OGLBlend& b = m_blendMapOGL[blend_index];
|
||||||
const GLenum& op = m_blendMapD3D9[blend_index].op;
|
|
||||||
if (GLState::eq_RGB != op) {
|
if (GLState::eq_RGB != b.op) {
|
||||||
GLState::eq_RGB = op;
|
GLState::eq_RGB = b.op;
|
||||||
if (gl_BlendEquationSeparateiARB)
|
if (gl_BlendEquationSeparateiARB)
|
||||||
gl_BlendEquationSeparateiARB(0, op, GL_FUNC_ADD);
|
gl_BlendEquationSeparateiARB(0, b.op, GL_FUNC_ADD);
|
||||||
else
|
else
|
||||||
gl_BlendEquationSeparate(op, GL_FUNC_ADD);
|
gl_BlendEquationSeparate(b.op, GL_FUNC_ADD);
|
||||||
}
|
}
|
||||||
|
|
||||||
const GLenum& src = m_blendMapD3D9[blend_index].src;
|
if (GLState::f_sRGB != b.src || GLState::f_dRGB != b.dst) {
|
||||||
const GLenum& dst = m_blendMapD3D9[blend_index].dst;
|
GLState::f_sRGB = b.src;
|
||||||
if (GLState::f_sRGB != src || GLState::f_dRGB != dst) {
|
GLState::f_dRGB = b.dst;
|
||||||
GLState::f_sRGB = src;
|
|
||||||
GLState::f_dRGB = dst;
|
|
||||||
if (gl_BlendFuncSeparateiARB)
|
if (gl_BlendFuncSeparateiARB)
|
||||||
gl_BlendFuncSeparateiARB(0, src, dst, GL_ONE, GL_ZERO);
|
gl_BlendFuncSeparateiARB(0, b.src, b.dst, GL_ONE, GL_ZERO);
|
||||||
else
|
else
|
||||||
gl_BlendFuncSeparate(src, dst, GL_ONE, GL_ZERO);
|
gl_BlendFuncSeparate(b.src, b.dst, GL_ONE, GL_ZERO);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -1541,7 +1540,7 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
|
||||||
const int GSDeviceOGL::m_NO_BLEND = 0;
|
const int GSDeviceOGL::m_NO_BLEND = 0;
|
||||||
const int GSDeviceOGL::m_MERGE_BLEND = 3*3*3*3;
|
const int GSDeviceOGL::m_MERGE_BLEND = 3*3*3*3;
|
||||||
|
|
||||||
const GSDeviceOGL::D3D9Blend GSDeviceOGL::m_blendMapD3D9[3*3*3*3 + 1] =
|
const GSDeviceOGL::OGLBlend GSDeviceOGL::m_blendMapOGL[3*3*3*3 + 1] =
|
||||||
{
|
{
|
||||||
{ BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs
|
{ BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs
|
||||||
{ 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd
|
{ 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd
|
||||||
|
|
|
@ -285,6 +285,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 blend_c:2;
|
uint32 blend_c:2;
|
||||||
uint32 blend_d:2;
|
uint32 blend_d:2;
|
||||||
uint32 clr1:1; // useful?
|
uint32 clr1:1; // useful?
|
||||||
|
uint32 pabe:1;
|
||||||
uint32 hdr:1;
|
uint32 hdr:1;
|
||||||
uint32 colclip:1;
|
uint32 colclip:1;
|
||||||
|
|
||||||
|
@ -292,7 +293,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 tcoffsethack:1;
|
uint32 tcoffsethack:1;
|
||||||
//uint32 point_sampler:1; Not tested, so keep the bit for blend
|
//uint32 point_sampler:1; Not tested, so keep the bit for blend
|
||||||
|
|
||||||
uint32 _free2:20;
|
uint32 _free2:19;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint64 key;
|
uint64 key;
|
||||||
|
@ -378,44 +379,8 @@ class GSDeviceOGL : public GSDevice
|
||||||
OMColorMaskSelector(uint32 c) { wrgba = c; }
|
OMColorMaskSelector(uint32 c) { wrgba = c; }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct OMBlendSelector
|
struct OGLBlend {uint16 bogus, op, src, dst;};
|
||||||
{
|
static const OGLBlend m_blendMapOGL[3*3*3*3 + 1];
|
||||||
union
|
|
||||||
{
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
uint32 abe:1;
|
|
||||||
uint32 a:2;
|
|
||||||
uint32 b:2;
|
|
||||||
uint32 c:2;
|
|
||||||
uint32 d:2;
|
|
||||||
|
|
||||||
uint32 _free:23;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
uint32 _abe:1;
|
|
||||||
uint32 abcd:8;
|
|
||||||
|
|
||||||
uint32 _free2:23;
|
|
||||||
};
|
|
||||||
|
|
||||||
uint32 key;
|
|
||||||
};
|
|
||||||
|
|
||||||
operator uint32() {return key;}
|
|
||||||
|
|
||||||
OMBlendSelector() : key(0) {}
|
|
||||||
|
|
||||||
bool IsCLR1() const
|
|
||||||
{
|
|
||||||
return (key & 0x19f) == 0x93; // abe == 1 && a == 1 && b == 2 && d == 1
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct D3D9Blend {int bogus, op, src, dst;};
|
|
||||||
static const D3D9Blend m_blendMapD3D9[3*3*3*3 + 1];
|
|
||||||
static const int m_NO_BLEND;
|
static const int m_NO_BLEND;
|
||||||
static const int m_MERGE_BLEND;
|
static const int m_MERGE_BLEND;
|
||||||
|
|
||||||
|
@ -562,7 +527,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
void PSSetSamplerState(GLuint ss);
|
void PSSetSamplerState(GLuint ss);
|
||||||
|
|
||||||
void OMSetDepthStencilState(GSDepthStencilOGL* dss);
|
void OMSetDepthStencilState(GSDepthStencilOGL* dss);
|
||||||
void OMSetBlendState(int blend_index = 0, float blend_factor = 0.0f, bool is_blend_constant = false);
|
void OMSetBlendState(uint8 blend_index = 0, uint8 blend_factor = 0, bool is_blend_constant = false);
|
||||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
|
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
|
||||||
void OMSetWriteBuffer(GLenum buffer = GL_COLOR_ATTACHMENT0);
|
void OMSetWriteBuffer(GLenum buffer = GL_COLOR_ATTACHMENT0);
|
||||||
void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector());
|
void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector());
|
||||||
|
|
|
@ -152,7 +152,7 @@ void GSRendererOGL::SetupIA()
|
||||||
dev->IASetPrimitiveTopology(t);
|
dev->IASetPrimitiveTopology(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel, GSDeviceOGL::PSConstantBuffer& ps_cb)
|
bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel)
|
||||||
{
|
{
|
||||||
bool require_barrier = false;
|
bool require_barrier = false;
|
||||||
|
|
||||||
|
@ -301,93 +301,77 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s
|
||||||
return require_barrier;
|
return require_barrier;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::PSConstantBuffer& ps_cb, bool DATE_GL42)
|
bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_GL42)
|
||||||
{
|
{
|
||||||
|
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
|
||||||
const GIFRegALPHA& ALPHA = m_context->ALPHA;
|
const GIFRegALPHA& ALPHA = m_context->ALPHA;
|
||||||
bool require_barrier = false;
|
bool require_barrier = false;
|
||||||
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
|
bool sw_blending = false;
|
||||||
float afix = (float)m_context->ALPHA.FIX / 0x80;
|
|
||||||
GSDeviceOGL::OMBlendSelector om_bsel;
|
|
||||||
|
|
||||||
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
|
// No blending so early exit
|
||||||
|
if (!(PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS)) {
|
||||||
om_bsel.a = ALPHA.A;
|
#ifdef ENABLE_OGL_DEBUG
|
||||||
om_bsel.b = ALPHA.B;
|
if (m_env.PABE.PABE) {
|
||||||
om_bsel.c = ALPHA.C;
|
GL_INS("!!! ENV PABE without ABE !!!");
|
||||||
om_bsel.d = ALPHA.D;
|
}
|
||||||
|
#endif
|
||||||
|
dev->OMSetBlendState();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (m_env.PABE.PABE)
|
if (m_env.PABE.PABE)
|
||||||
{
|
{
|
||||||
GL_INS("!!! ENV PABE not supported !!!");
|
GL_INS("!!! ENV PABE not supported !!!");
|
||||||
// FIXME it could be supported with SW blending!
|
if (m_sw_blending >= ACC_BLEND_CCLIP_DALPHA) {
|
||||||
if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
|
ps_sel.pabe = 1;
|
||||||
{
|
require_barrier |= (ALPHA.C == 1);
|
||||||
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
|
sw_blending = true;
|
||||||
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
|
|
||||||
om_bsel.abe = 0;
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
|
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
|
||||||
//ASSERT(0);
|
//ASSERT(0);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// No blending so early exit
|
|
||||||
if (!om_bsel.abe) {
|
|
||||||
dev->OMSetBlendState();
|
|
||||||
return require_barrier;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute the blending equation to detect special case
|
// Compute the blending equation to detect special case
|
||||||
int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d;
|
uint8 blend_index = ((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D;
|
||||||
int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus;
|
int blend_flag = GSDeviceOGL::m_blendMapOGL[blend_index].bogus;
|
||||||
|
|
||||||
// SW Blend is (nearly) free. Let's use it.
|
// SW Blend is (nearly) free. Let's use it.
|
||||||
bool free_blend = (blend_flag & BLEND_NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO);
|
bool impossible_or_free_blend = (blend_flag & (BLEND_NO_BAR|BLEND_A_MAX|BLEND_ACCU))
|
||||||
// We really need SW blending for this one, barely used
|
|| (m_prim_overlap == PRIM_OVERLAP_NO);
|
||||||
bool impossible_blend = (blend_flag & BLEND_A_MAX);
|
|
||||||
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
|
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
|
||||||
bool accumulation_blend = (blend_flag & BLEND_ACCU);
|
bool accumulation_blend = (blend_flag & BLEND_ACCU);
|
||||||
|
|
||||||
bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend);
|
// Warning no break on purpose
|
||||||
|
switch (m_sw_blending) {
|
||||||
|
case ACC_BLEND_ULTRA: sw_blending |= true;
|
||||||
|
case ACC_BLEND_FULL: if (!m_vt.m_alpha.valid && (ALPHA.C == 0)) GetAlphaMinMax();
|
||||||
|
sw_blending |= (ALPHA.A != ALPHA.B) &&
|
||||||
|
((ALPHA.C == 0 && m_vt.m_alpha.max > 128u) || (ALPHA.C == 2 && ALPHA.FIX > 128u));
|
||||||
|
case ACC_BLEND_CCLIP_DALPHA: sw_blending |= (ALPHA.C == 1) || (m_env.COLCLAMP.CLAMP == 0);
|
||||||
|
case ACC_BLEND_SPRITE: sw_blending |= m_vt.m_primclass == GS_SPRITE_CLASS;
|
||||||
|
case ACC_BLEND_FREE: sw_blending |= ps_sel.fbmask || impossible_or_free_blend;
|
||||||
|
default: sw_blending |= accumulation_blend;
|
||||||
|
}
|
||||||
|
// SW Blending
|
||||||
|
// GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive
|
||||||
|
// that write the bad alpha value. Sw blending will force the draw to run primitive by primitive
|
||||||
|
// (therefore primitiveID will be constant to 1)
|
||||||
|
sw_blending &= !DATE_GL42;
|
||||||
|
|
||||||
// Color clip
|
// Color clip
|
||||||
if (m_env.COLCLAMP.CLAMP == 0) {
|
if (m_env.COLCLAMP.CLAMP == 0) {
|
||||||
if (accumulation_blend) {
|
if (accumulation_blend) {
|
||||||
ps_sel.hdr = 1;
|
ps_sel.hdr = 1;
|
||||||
GL_INS("COLCLIP Fast HDR mode ENABLED");
|
GL_INS("COLCLIP Fast HDR mode ENABLED");
|
||||||
} else if (m_sw_blending >= ACC_BLEND_CCLIP_DALPHA || sw_blending_base) {
|
} else if (sw_blending) {
|
||||||
ps_sel.colclip = 1;
|
ps_sel.colclip = 1;
|
||||||
sw_blending_base = true;
|
|
||||||
GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
||||||
} else {
|
} else {
|
||||||
GL_INS("Sorry colclip isn't supported");
|
GL_INS("Sorry colclip isn't supported");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: Option is duplicated, one impact the blend unit / the other the shader.
|
|
||||||
sw_blending_base |= accumulation_blend;
|
|
||||||
|
|
||||||
// Warning no break on purpose
|
|
||||||
bool sw_blending_adv = false;
|
|
||||||
switch (m_sw_blending) {
|
|
||||||
case ACC_BLEND_ULTRA: sw_blending_adv |= true;
|
|
||||||
case ACC_BLEND_FULL: sw_blending_adv |= !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) );
|
|
||||||
case ACC_BLEND_CCLIP_DALPHA: sw_blending_adv |= (ALPHA.C == 1);
|
|
||||||
case ACC_BLEND_SPRITE: sw_blending_adv |= m_vt.m_primclass == GS_SPRITE_CLASS;
|
|
||||||
default: break;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool sw_blending = sw_blending_base // Free case or Impossible blend
|
|
||||||
|| sw_blending_adv // complex blending case (for special effect)
|
|
||||||
|| ps_sel.fbmask; // accurate fbmask
|
|
||||||
|
|
||||||
|
|
||||||
// SW Blending
|
|
||||||
// GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive
|
|
||||||
// that write the bad alpha value. Sw blending will force the draw to run primitive by primitive
|
|
||||||
// (therefore primitiveID will be constant to 1)
|
|
||||||
sw_blending &= !DATE_GL42;
|
|
||||||
// Seriously don't expect me to support this kind of crazyness.
|
// Seriously don't expect me to support this kind of crazyness.
|
||||||
// No mix of COLCLIP + accumulation_blend + DATE GL42
|
// No mix of COLCLIP + accumulation_blend + DATE GL42
|
||||||
// Neither fbmask and GL42
|
// Neither fbmask and GL42
|
||||||
|
@ -397,18 +381,17 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL
|
||||||
// For stat to optimize accurate option
|
// For stat to optimize accurate option
|
||||||
#if 0
|
#if 0
|
||||||
GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)",
|
GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)",
|
||||||
om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending);
|
ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending);
|
||||||
#endif
|
#endif
|
||||||
if (sw_blending) {
|
if (sw_blending) {
|
||||||
ps_sel.blend_a = om_bsel.a;
|
ps_sel.blend_a = ALPHA.A;
|
||||||
ps_sel.blend_b = om_bsel.b;
|
ps_sel.blend_b = ALPHA.B;
|
||||||
ps_sel.blend_c = om_bsel.c;
|
ps_sel.blend_c = ALPHA.C;
|
||||||
ps_sel.blend_d = om_bsel.d;
|
ps_sel.blend_d = ALPHA.D;
|
||||||
|
|
||||||
if (accumulation_blend) {
|
if (accumulation_blend) {
|
||||||
// Keep HW blending to do the addition
|
// Keep HW blending to do the addition
|
||||||
dev->OMSetBlendState(blend_sel);
|
dev->OMSetBlendState(blend_index);
|
||||||
om_bsel.abe = 1;
|
|
||||||
// Remove the addition from the SW blending
|
// Remove the addition from the SW blending
|
||||||
ps_sel.blend_d = 2;
|
ps_sel.blend_d = 2;
|
||||||
} else {
|
} else {
|
||||||
|
@ -418,19 +401,19 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL
|
||||||
|
|
||||||
// Require the fix alpha vlaue
|
// Require the fix alpha vlaue
|
||||||
if (ALPHA.C == 2) {
|
if (ALPHA.C == 2) {
|
||||||
ps_cb.AlphaCoeff.a = afix;
|
ps_cb.AlphaCoeff.a = (float)ALPHA.FIX / 128.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// No need to flush for every primitive
|
// No need to flush for every primitive
|
||||||
require_barrier |= !(blend_flag & BLEND_NO_BAR) && !accumulation_blend;
|
require_barrier |= !(blend_flag & BLEND_NO_BAR) && !accumulation_blend;
|
||||||
} else {
|
} else {
|
||||||
ps_sel.clr1 = om_bsel.IsCLR1();
|
ps_sel.clr1 = (blend_flag & BLEND_C_CLR);
|
||||||
if (ps_sel.dfmt == 1 && ALPHA.C == 1) {
|
if (ps_sel.dfmt == 1 && ALPHA.C == 1) {
|
||||||
// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
|
// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
|
||||||
int hacked_blend_sel = blend_sel + 3; // +3 <=> +1 on C
|
int hacked_blend_index = blend_index + 3; // +3 <=> +1 on C
|
||||||
dev->OMSetBlendState(hacked_blend_sel, 1.0f, true);
|
dev->OMSetBlendState(hacked_blend_index, 128, true);
|
||||||
} else {
|
} else {
|
||||||
dev->OMSetBlendState(blend_sel, afix, (ALPHA.C == 2));
|
dev->OMSetBlendState(blend_index, ALPHA.FIX, (ALPHA.C == 2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -577,16 +560,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
|
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
|
||||||
dev->s_n = s_n;
|
dev->s_n = s_n;
|
||||||
|
|
||||||
// FIXME: optimization, latch ps_cb & vs_cb in the object
|
|
||||||
// 1/ Avoid a reset every draw
|
|
||||||
// 2/ potentially less update
|
|
||||||
GSDeviceOGL::VSSelector vs_sel;
|
GSDeviceOGL::VSSelector vs_sel;
|
||||||
GSDeviceOGL::VSConstantBuffer vs_cb;
|
|
||||||
|
|
||||||
GSDeviceOGL::GSSelector gs_sel;
|
GSDeviceOGL::GSSelector gs_sel;
|
||||||
|
|
||||||
GSDeviceOGL::PSSelector ps_sel;
|
GSDeviceOGL::PSSelector ps_sel;
|
||||||
GSDeviceOGL::PSConstantBuffer ps_cb;
|
|
||||||
GSDeviceOGL::PSSamplerSelector ps_ssel;
|
GSDeviceOGL::PSSamplerSelector ps_ssel;
|
||||||
|
|
||||||
GSDeviceOGL::OMColorMaskSelector om_csel;
|
GSDeviceOGL::OMColorMaskSelector om_csel;
|
||||||
|
@ -604,7 +581,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
m_prim_overlap = PRIM_OVERLAP_UNKNOW;
|
m_prim_overlap = PRIM_OVERLAP_UNKNOW;
|
||||||
}
|
}
|
||||||
|
|
||||||
require_barrier |= EmulateTextureShuffleAndFbmask(ps_sel, om_csel, ps_cb);
|
require_barrier |= EmulateTextureShuffleAndFbmask(ps_sel, om_csel);
|
||||||
|
|
||||||
// DATE: selection of the algorithm. Must be done before blending because GL42 is not compatible with blending
|
// DATE: selection of the algorithm. Must be done before blending because GL42 is not compatible with blending
|
||||||
|
|
||||||
|
@ -632,7 +609,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
// Blend
|
// Blend
|
||||||
|
|
||||||
if (!IsOpaque() && rt) {
|
if (!IsOpaque() && rt) {
|
||||||
require_barrier |= EmulateBlending(ps_sel, ps_cb, DATE_GL42);
|
require_barrier |= EmulateBlending(ps_sel, DATE_GL42);
|
||||||
} else {
|
} else {
|
||||||
dev->OMSetBlendState(); // No blending please
|
dev->OMSetBlendState(); // No blending please
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,19 +48,21 @@ class GSRendererOGL : public GSRendererHW
|
||||||
GSVector2 m_pixelcenter;
|
GSVector2 m_pixelcenter;
|
||||||
bool m_accurate_date;
|
bool m_accurate_date;
|
||||||
int m_sw_blending;
|
int m_sw_blending;
|
||||||
|
PRIM_OVERLAP m_prim_overlap;
|
||||||
|
|
||||||
unsigned int UserHacks_TCOffset;
|
unsigned int UserHacks_TCOffset;
|
||||||
float UserHacks_TCO_x, UserHacks_TCO_y;
|
float UserHacks_TCO_x, UserHacks_TCO_y;
|
||||||
|
|
||||||
PRIM_OVERLAP m_prim_overlap;
|
GSDeviceOGL::VSConstantBuffer vs_cb;
|
||||||
|
GSDeviceOGL::PSConstantBuffer ps_cb;
|
||||||
|
|
||||||
GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize);
|
GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void EmulateGS();
|
void EmulateGS();
|
||||||
void SetupIA();
|
void SetupIA();
|
||||||
bool EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel, GSDeviceOGL::PSConstantBuffer& ps_cb);
|
bool EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel);
|
||||||
bool EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::PSConstantBuffer& ps_cb, bool DATE_GL42);
|
bool EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_GL42);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRendererOGL();
|
GSRendererOGL();
|
||||||
|
|
|
@ -442,7 +442,7 @@ void ps_blend(inout vec4 Color, float As)
|
||||||
// FIXME dithering
|
// FIXME dithering
|
||||||
|
|
||||||
// Correct the Color value based on the output format
|
// Correct the Color value based on the output format
|
||||||
#if PS_COLCLIP == 0
|
#if PS_COLCLIP == 0 && PS_HDR == 0
|
||||||
// Standard Clamp
|
// Standard Clamp
|
||||||
Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));
|
Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));
|
||||||
#endif
|
#endif
|
||||||
|
@ -457,7 +457,7 @@ void ps_blend(inout vec4 Color, float As)
|
||||||
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
|
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
|
||||||
|
|
||||||
Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));
|
Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));
|
||||||
#elif PS_COLCLIP == 1
|
#elif PS_COLCLIP == 1 && PS_HDR == 0
|
||||||
Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));
|
Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1301,7 +1301,7 @@ static const char* tfx_fs_all_glsl =
|
||||||
" // FIXME dithering\n"
|
" // FIXME dithering\n"
|
||||||
"\n"
|
"\n"
|
||||||
" // Correct the Color value based on the output format\n"
|
" // Correct the Color value based on the output format\n"
|
||||||
"#if PS_COLCLIP == 0\n"
|
"#if PS_COLCLIP == 0 && PS_HDR == 0\n"
|
||||||
" // Standard Clamp\n"
|
" // Standard Clamp\n"
|
||||||
" Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n"
|
" Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
|
@ -1316,7 +1316,7 @@ static const char* tfx_fs_all_glsl =
|
||||||
" // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
|
" // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
|
||||||
"\n"
|
"\n"
|
||||||
" Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n"
|
" Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n"
|
||||||
"#elif PS_COLCLIP == 1\n"
|
"#elif PS_COLCLIP == 1 && PS_HDR == 0\n"
|
||||||
" Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n"
|
" Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
|
Loading…
Reference in New Issue