gsdx-ogl: linux only

* implement some missing shader for DATE, invert coordinate like strech rectangle
* Use glCopyImageSubDataNV nvidia extension to copy image (you need latest AMD drivers)


git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5086 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2012-01-31 17:08:05 +00:00
parent e0004b5027
commit 6680d285ea
9 changed files with 68 additions and 714 deletions

View File

@ -19,7 +19,7 @@ set(CommonFlags
-std=c++0x
-fno-strict-aliasing
-DOGL_DEBUG # FIXME remove me when code is ready
-DAMD_DRIVER_WORKAROUND
-DAMD_DRIVER_WORKAROUND
)
set(OptimizationFlags

View File

@ -1360,8 +1360,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
//while(IsWindowVisible(hWnd))
//FIXME map?
bool finished = false;
while(!finished)
int finished = 2;
while(finished > 0)
{
unsigned long start = timeGetTime();
unsigned long frame_number = 0;
@ -1410,7 +1410,7 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
fprintf(stderr, "A means of %fms by frame\n", (float)(end - start)/(float)frame_number);
sleep(1);
finished = true;
finished--;
}

View File

@ -26,7 +26,7 @@
//#define ONLY_LINES
// It seems dual blending does not work on AMD !!!
#define DISABLE_DUAL_BLEND
//#define DISABLE_DUAL_BLEND
static uint32 g_draw_count = 0;
static uint32 g_frame_count = 1;
@ -49,6 +49,10 @@ GSDeviceOGL::GSDeviceOGL()
memset(&m_convert, 0, sizeof(m_convert));
memset(&m_date, 0, sizeof(m_date));
memset(&m_state, 0, sizeof(m_state));
// Reset the debug file
FILE* f = fopen("Debug.txt","w");
fclose(f);
}
GSDeviceOGL::~GSDeviceOGL()
@ -522,7 +526,11 @@ void GSDeviceOGL::DebugOutput()
if ( (start != 0 && g_frame_count >= start && g_frame_count < (start + length)) ) dump_me = true;
if ( dump_me ) {
if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_f%d__d%d.bmp", g_frame_count, g_draw_count));
if (m_state.rtv == m_backbuffer) {
if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_f%d__d%d__back.bmp", g_frame_count, g_draw_count));
} else {
if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_f%d__d%d__tex.bmp", g_frame_count, g_draw_count));
}
//if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/ds_out_%d.bmp", g_draw_count));
fprintf(stderr, "\n");
@ -689,6 +697,18 @@ void GSDeviceOGL::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
// GL_NV_copy_image seem like the good extension but not supported on AMD...
// Maybe opengl 4.3 !
// FIXME check those function work as expected
// FIXME: it is an NVIDIA extension. Hopefully lastest AMD driver support it too.
// An EXT extensions might be release later.
// void CopyImageSubDataNV(
// uint srcName, enum srcTarget, int srcLevel, int srcX, int srcY, int srcZ,
// uint dstName, enum dstTarget, int dstLevel, int dstX, int dstY, int dstZ,
// sizei width, sizei height, sizei depth);
glCopyImageSubDataNV( static_cast<GSTextureOGL*>(st)->GetID(), GL_TEXTURE_2D,
0, r.x, r.y, 0,
static_cast<GSTextureOGL*>(dt)->GetID(), GL_TEXTURE_2D,
0, r.x, r.y, 0,
r.width(), r.height(), 1);
#if 0
// FIXME FBO
GLuint fbo_old = m_state.fbo;
@ -703,24 +723,8 @@ void GSDeviceOGL::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, dt->GetWidth(), dt->GetHeight());
OMSetFBO(fbo_old);
#if 0
// FIXME attach the texture to the FBO
GSTextureOGL* st_ogl = (GSTextureOGL*) st;
GSTextureOGL* dt_ogl = (GSTextureOGL*) dt;
dt_ogl->Attach(GL_COLOR_ATTACHMENT0);
st_ogl->Attach(GL_COLOR_ATTACHMENT1);
glReadBuffer(GL_COLOR_ATTACHMENT1);
// FIXME I'not sure how to select the destination
// const GLenum draw_buffer[1] = { GL_COLOR_ATTACHMENT0 };
// glDrawBuffers(draw_buffer);
dt_ogl->EnableUnit(0);
// FIXME need acess of target and it probably need to be same for both
//glCopyTexSubImage2D(dt_ogl.m_texture_target, 0, 0, 0, r.left, r.bottom, r.right-r.left, r.top-r.bottom);
// FIXME I'm not sure GL_TEXTURE_RECTANGLE is supported!!!
//glCopyTexSubImage2D(GL_TEXTURE_RECTANGLE, 0, 0, 0, r.left, r.bottom, r.right-r.left, r.top-r.bottom);
#endif
#if 0
D3D11_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1};
m_ctx->CopySubresourceRegion(*(GSTexture11*)dt, 0, 0, 0, 0, *(GSTexture11*)st, 0, &box);
@ -790,6 +794,8 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
{GSVector4(left, top, 0.5f, 1.0f), GSVector2(flip_sr.x, flip_sr.w)},
{GSVector4(right, top, 0.5f, 1.0f), GSVector2(flip_sr.z, flip_sr.w)},
};
//fprintf(stderr, "A:%fx%f B:%fx%f\n", left, top, bottom, right);
//fprintf(stderr, "SR: %f %f %f %f\n", sr.x, sr.y, sr.z, sr.w);
IASetVertexState(m_vb_sr);
IASetVertexBuffer(vertices, 4);
@ -871,8 +877,6 @@ void GSDeviceOGL::DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool lin
void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm)
{
assert(0);
const GSVector2i& size = rt->GetSize();
if(GSTexture* t = CreateRenderTarget(size.x, size.y, rt->IsMSAA()))
@ -1009,7 +1013,7 @@ void GSDeviceOGL::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
{
PSSetShaderResource(0, sr0);
PSSetShaderResource(1, sr1);
PSSetShaderResource(2, NULL);
//PSSetShaderResource(2, NULL);
}
void GSDeviceOGL::PSSetShaderResource(int i, GSTexture* sr)
@ -1026,11 +1030,12 @@ void GSDeviceOGL::PSSetShaderResource(int i, GSTexture* sr)
void GSDeviceOGL::PSSetSamplerState(GLuint ss0, GLuint ss1, GLuint ss2)
{
if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1 || m_state.ps_ss[2] != ss2)
if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1)
//if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1 || m_state.ps_ss[2] != ss2)
{
m_state.ps_ss[0] = ss0;
m_state.ps_ss[1] = ss1;
m_state.ps_ss[2] = ss2;
//m_state.ps_ss[2] = ss2;
m_ss_changed = true;
}
@ -1054,7 +1059,7 @@ void GSDeviceOGL::PSSetShader(GLuint ps)
// 4/ set the sampler state
// glBindSampler(1 , sampler);
if (m_srv_changed || m_ss_changed) {
for (uint i=0 ; i < 3; i++) {
for (uint i=0 ; i < 1; i++) {
if (m_state.ps_srv[i] != NULL) {
m_state.ps_srv[i]->EnableUnit(i);
glBindSampler(i, m_state.ps_ss[i]);
@ -1129,14 +1134,12 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto
static_cast<GSTextureOGL*>(ds)->Attach(GL_DEPTH_STENCIL_ATTACHMENT);
}
// Viewport -> glViewport
if(m_state.viewport != rt->GetSize())
{
m_state.viewport = rt->GetSize();
glViewport(0, 0, rt->GetWidth(), rt->GetHeight());
}
// Scissor -> glScissor (note must be enabled)
GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy();
if(!m_state.scissor.eq(r))

View File

@ -801,20 +801,17 @@ class GSDeviceOGL : public GSDevice
GSBlendStateOGL* bs;
} m_convert;
struct
{
struct {
GLuint ps;
GSUniformBufferOGL *cb;
} m_fxaa;
struct
{
struct {
GSDepthStencilOGL* dss;
GSBlendStateOGL* bs;
} m_date;
struct
{
struct {
GSVertexBufferStateOGL* vb;
GLuint vs; // program
GSUniformBufferOGL* cb; // uniform current buffer
@ -841,26 +838,6 @@ class GSDeviceOGL : public GSDevice
bool m_srv_changed;
bool m_ss_changed;
#if 0
CComPtr<ID3D11Device> m_dev;
CComPtr<ID3D11DeviceContext> m_ctx;
CComPtr<IDXGISwapChain> m_swapchain;
CComPtr<ID3D11RasterizerState> m_rs;
// Shaders...
CComPtr<ID3D11SamplerState> m_palette_ss;
CComPtr<ID3D11SamplerState> m_rt_ss;
#endif
// hash_map<uint32, GSVertexShader11 > m_vs;
// hash_map<uint32, CComPtr<ID3D11GeometryShader> > m_gs;
// hash_map<uint32, CComPtr<ID3D11PixelShader> > m_ps;
// hash_map<uint32, CComPtr<ID3D11SamplerState> > m_ps_ss;
// hash_map<uint32, CComPtr<ID3D11DepthStencilState> > m_om_dss;
// hash_map<uint32, CComPtr<ID3D11BlendState> > m_om_bs;
hash_map<uint32, GLuint > m_vs;
hash_map<uint32, GLuint > m_gs;
hash_map<uint32, GLuint > m_ps;
@ -868,13 +845,9 @@ class GSDeviceOGL : public GSDevice
hash_map<uint32, GSDepthStencilOGL* > m_om_dss;
hash_map<uint32, GSBlendStateOGL* > m_om_bs;
//CComPtr<ID3D11SamplerState> m_palette_ss;
//CComPtr<ID3D11SamplerState> m_rt_ss;
GLuint m_palette_ss;
GLuint m_rt_ss;
//CComPtr<ID3D11Buffer> m_vs_cb;
//CComPtr<ID3D11Buffer> m_ps_cb;
GSUniformBufferOGL* m_vs_cb;
GSUniformBufferOGL* m_ps_cb;

View File

@ -24,9 +24,6 @@
GSRendererOGL::GSRendererOGL()
// FIXME
//: GSRendererHW<GSVertexHWOGL>(new GSTextureCacheOGL(this))
//: GSRendererHW<GSVertexHW11>(new GSTextureCacheOGL(this))
: GSRendererHW(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCacheOGL(this))
, m_topology(0)
{
@ -35,8 +32,6 @@ GSRendererOGL::GSRendererOGL()
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
m_pixelcenter = GSVector2(-0.5f, -0.5f);
// TODO must be implementer with macro InitVertexKick(GSRendererOGL)
// template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
InitConvertVertex(GSRendererOGL);
}
@ -68,600 +63,6 @@ void GSRendererOGL::ConvertVertex(size_t dst_index, size_t src_index)
((GSVector4i*)d)[1] = v1;
}
#if 0
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererOGL::VertexKick(bool skip)
{
GSVertexHW11& dst = m_vl.AddTail();
dst = *(GSVertexHW11*)&m_v;
#ifdef ENABLE_UPSCALE_HACKS
if(tme && fst)
{
//GSVector4::storel(&dst.ST, m_v.GetUV());
int Udiff = 0;
int Vdiff = 0;
int Uadjust = 0;
int Vadjust = 0;
int multiplier = GetUpscaleMultiplier();
if(multiplier > 1)
{
Udiff = m_v.UV.U & 4095;
Vdiff = m_v.UV.V & 4095;
if(Udiff != 0)
{
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
}
Udiff = m_v.UV.U & 255;
Vdiff = m_v.UV.V & 255;
if(Udiff != 0)
{
if (Udiff >= 248) { Uadjust = -1; }
else if (Udiff <= 8) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 248) { Vadjust = -1; }
else if (Vdiff <= 8) { Vadjust = 1; }
}
Udiff = m_v.UV.U & 15;
Vdiff = m_v.UV.V & 15;
if(Udiff != 0)
{
if (Udiff >= 15) { Uadjust = -1; }
else if (Udiff <= 1) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 15) { Vadjust = -1; }
else if (Vdiff <= 1) { Vadjust = 1; }
}
}
dst.ST.S = (float)m_v.UV.U - Uadjust;
dst.ST.T = (float)m_v.UV.V - Vadjust;
}
else if(tme)
{
// Wip :p
//dst.XYZ.X += 5;
//dst.XYZ.Y += 5;
}
#else
if(tme && fst)
{
GSVector4::storel(&dst.ST, m_v.GetUV());
}
#endif
int count = 0;
if(GSVertexHW11* v = DrawingKick<prim>(skip, count))
{
GSVector4i scissor = m_context->scissor.dx10;
GSVector4i pmin, pmax;
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2;
switch(prim)
{
case GS_POINTLIST:
v0 = GSVector4i::load((int)v[0].p.xy).upl16();
pmin = v0;
pmax = v0;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
v0 = GSVector4i::load((int)v[0].p.xy);
v1 = GSVector4i::load((int)v[1].p.xy);
pmin = v0.min_u16(v1).upl16();
pmax = v0.max_u16(v1).upl16();
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
v0 = GSVector4i::load((int)v[0].p.xy);
v1 = GSVector4i::load((int)v[1].p.xy);
v2 = GSVector4i::load((int)v[2].p.xy);
pmin = v0.min_u16(v1).min_u16(v2).upl16();
pmax = v0.max_u16(v1).max_u16(v2).upl16();
break;
}
#else
switch(prim)
{
case GS_POINTLIST:
pmin.x = v[0].p.x;
pmin.y = v[0].p.y;
pmax.x = v[0].p.x;
pmax.y = v[0].p.y;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin.x = std::min<uint16>(v[0].p.x, v[1].p.x);
pmin.y = std::min<uint16>(v[0].p.y, v[1].p.y);
pmax.x = std::max<uint16>(v[0].p.x, v[1].p.x);
pmax.y = std::max<uint16>(v[0].p.y, v[1].p.y);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin.x = std::min<uint16>(std::min<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
pmin.y = std::min<uint16>(std::min<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
pmax.x = std::max<uint16>(std::max<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
pmax.y = std::max<uint16>(std::max<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
break;
}
#endif
GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin == pmax;
break;
}
if(test.mask() & 0xff)
{
return;
}
m_count += count;
}
}
#endif
#if 0
{
switch(m_vt.m_primclass)
{
case GS_POINT_CLASS:
m_topology = GL_POINTS;
m_perfmon.Put(GSPerfMon::Prim, m_count);
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
m_topology = GL_LINES;
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
break;
case GS_TRIANGLE_CLASS:
m_topology = GL_TRIANGLES;
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
break;
default:
__assume(0);
}
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
GSTexture *rtcopy = NULL;
ASSERT(m_dev != NULL);
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
if(DATE)
{
if(dev->HasStencil())
{
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}
else
{
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
// I'll use VertexTrace when I consider it more trustworthy
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
}
}
//
dev->BeginScene();
// om
GSDeviceOGL::OMDepthStencilSelector om_dssel;
if(context->TEST.ZTE)
{
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
}
else
{
om_dssel.ztst = ZTST_ALWAYS;
}
if(m_fba)
{
om_dssel.fba = context->FBA.FBA;
}
GSDeviceOGL::OMBlendSelector om_bsel;
if(!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
if(env.PABE.PABE)
{
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
{
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
om_bsel.abe = 0;
}
else
{
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
//ASSERT(0);
}
}
}
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
// vs
GSDeviceOGL::VSSelector vs_sel;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
vs_sel.rtcopy = !!rtcopy;
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt.m_min.p.z > 0xffffff)
{
vs_sel.bppz = 1;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt.m_min.p.z > 0xffff)
{
vs_sel.bppz = 2;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
}
// FIXME Opengl support half pixel center (as dx10). Code could be easier!!!
GSDeviceOGL::VSConstantBuffer vs_cb;
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)context->XYOFFSET.OFX;
float oy = (float)(int)context->XYOFFSET.OFY;
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
//because DX10 and DX9 have a different pixel center.)
//
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
}
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
// END of FIXME
// gs
GSDeviceOGL::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt.m_primclass;
// ps
GSDeviceOGL::PSSelector ps_sel;
GSDeviceOGL::PSSamplerSelector ps_ssel;
GSDeviceOGL::PSConstantBuffer ps_cb;
if(DATE)
{
if(dev->HasStencil())
{
om_dssel.date = 1;
}
else
{
ps_sel.date = 1 + context->TEST.DATM;
}
}
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
ps_sel.colclip = 1;
}
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)
{
ps_sel.atst = context->TEST.ATST;
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
}
else
{
ps_sel.atst = ATST_ALWAYS;
}
if(tex)
{
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.fmt = tex->m_fmt;
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << context->TEX0.TW);
int th = (int)(1 << context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
GSVector4 clamp(ps_cb.MskFix);
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
ps_cb.MinMax = clamp / WH.xyxy();
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
ps_ssel.ltf = ps_sel.ltf;
}
else
{
ps_sel.tfx = 4;
}
// rs
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
dev->OMSetRenderTargets(rt, ds, &scissor);
dev->PSSetShaderResource(0, tex ? tex->m_texture : 0);
dev->PSSetShaderResource(1, tex ? tex->m_palette : 0);
dev->PSSetShaderResource(2, rtcopy);
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertices, m_count, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
// draw
if(context->TEST.DoFirstPass())
{
dev->DrawPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel);
GSDeviceOGL::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawPrimitive();
}
}
if(context->TEST.DoSecondPass())
{
ASSERT(!env.PABE.PABE);
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
ps_sel.atst = iatst[ps_sel.atst];
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;
bool g = om_bsel.wg;
bool b = om_bsel.wb;
bool a = om_bsel.wa;
switch(context->TEST.AFAIL)
{
case 0: z = r = g = b = a = false; break; // none
case 1: z = false; break; // rgba
case 2: r = g = b = a = false; break; // z
case 3: z = a = false; break; // rgb
default: __assume(0);
}
if(z || r || g || b || a)
{
om_dssel.zwe = z;
om_bsel.wr = r;
om_bsel.wg = g;
om_bsel.wb = b;
om_bsel.wa = a;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->DrawPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel);
GSDeviceOGL::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawPrimitive();
}
}
}
dev->EndScene();
dev->Recycle(rtcopy);
if(om_dssel.fba) UpdateFBA(rt);
}
#endif
void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(m_vt->m_primclass)
@ -707,11 +108,21 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
GSVertexPT1 vertices[] =
{
#if 0
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
#else
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.w)},
#endif
};
//fprintf(stderr, "DATE A:%fx%f B:%fx%f\n", dst.x, -dst.y, dst.z, -dst.w);
//fprintf(stderr, "DATE SR: %f %f %f %f\n", src.x, src.y, src.z, src.w);
//fprintf(stderr, "DATE offset: %f\n", o.x);
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}

View File

@ -93,7 +93,7 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format, GLuint
// Allocate the buffer
switch (m_type) {
case GSTexture::DepthStencil:
EnableUnit(1);
EnableUnit(2);
glTexImage2D(m_texture_target, 0, m_format, m_size.x, m_size.y, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL);
break;
case GSTexture::RenderTarget:
@ -102,7 +102,7 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format, GLuint
// Howto allocate the texture unit !!!
// In worst case the HW renderer seems to use 3 texture unit
// For the moment SW renderer only use 1 so don't bother
EnableUnit(0);
EnableUnit(2);
if (m_format == GL_RGBA8) {
glTexImage2D(m_texture_target, 0, m_format, m_size.x, m_size.y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
}
@ -140,7 +140,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
// FIXME warning order of the y axis
// FIXME I'm not confident with GL_UNSIGNED_BYTE type
EnableUnit(0);
EnableUnit(2);
if (m_format != GL_RGBA8) {
fprintf(stderr, "wrong pixel format\n");
@ -368,12 +368,12 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
glReadBuffer(GL_BACK);
glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image);
} else if(IsDss()) {
EnableUnit(1);
EnableUnit(2);
glGetTexImage(m_texture_target, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, image);
} else {
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
EnableUnit(1);
EnableUnit(2);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, m_texture_target, m_texture_id, 0);
glReadBuffer(GL_COLOR_ATTACHMENT1);

View File

@ -47,4 +47,6 @@ class GSTextureOGL : public GSTexture
bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); }
bool IsDss() { return (m_type == GSTexture::DepthStencil); }
GLuint GetID() { return m_texture_id; }
};

View File

@ -141,62 +141,27 @@ void ps_main6() // diagonal
SV_Target0 = c;
}
// Avoid to log useless error compilation failure
void ps_main2()
{
if((sample_c().a - 128.0f / 255) < 0) // >= 0x80 pass
discard;
SV_Target0 = vec4(0.0f, 0.0f, 0.0f, 0.0f);
}
void ps_main3()
{
if((127.95f / 255 - sample_c().a) <0) // < 0x80 pass (== 0x80 should not pass)
discard;
SV_Target0 = vec4(0.0f, 0.0f, 0.0f, 0.0f);
}
void ps_main4()
{
// FIXME mod and fmod are different when value are negative
// output.c = fmod(sample_c(input.t) * 255 + 0.5f, 256) / 255;
vec4 c = mod(sample_c() * 255 + 0.5f, 256) / 255;
SV_Target0 = c;
}
// Texture2D Texture;
// SamplerState TextureSampler;
//
// uint ps_main1(PS_INPUT input) : SV_Target0
// {
// float4 c = sample_c(input.t);
//
// c.a *= 256.0f / 127; // hm, 0.5 won't give us 1.0 if we just multiply with 2
//
// uint4 i = c * float4(0x001f, 0x03e0, 0x7c00, 0x8000);
//
// return (i.x & 0x001f) | (i.y & 0x03e0) | (i.z & 0x7c00) | (i.w & 0x8000);
// }
//
// PS_OUTPUT ps_main2(PS_INPUT input)
// {
// PS_OUTPUT output;
//
// clip(sample_c(input.t).a - 128.0f / 255); // >= 0x80 pass
//
// output.c = 0;
//
// return output;
// }
//
// PS_OUTPUT ps_main3(PS_INPUT input)
// {
// PS_OUTPUT output;
//
// clip(127.95f / 255 - sample_c(input.t).a); // < 0x80 pass (== 0x80 should not pass)
//
// output.c = 0;
//
// return output;
// }
//
// PS_OUTPUT ps_main4(PS_INPUT input)
// {
// PS_OUTPUT output;
//
// output.c = fmod(sample_c(input.t) * 255 + 0.5f, 256) / 255;
//
// return output;
// }
//
#endif

View File

@ -637,9 +637,9 @@ void ps_main()
if(c.a < 0.5) c.a += 0.5;
}
SV_Target1 = c;
#ifndef DISABLE_DUAL_BLEND
SV_Target0 = vec4(alpha, alpha, alpha, alpha);
#endif
SV_Target1 = c;
}
#endif