diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt index 0cbc9f0131..fb428260a6 100644 --- a/plugins/GSdx/CMakeLists.txt +++ b/plugins/GSdx/CMakeLists.txt @@ -83,6 +83,7 @@ set(GSdxSources GSPerfMon.cpp GSRasterizer.cpp GSRenderer.cpp + GSRendererHW.cpp GSRendererNull.cpp GSRendererOGL.cpp GSRendererSW.cpp diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 8131d591bc..fe7756d796 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -490,17 +490,13 @@ void GSDeviceOGL::Flip() #endif } -void GSDeviceOGL::DrawPrimitive() +void GSDeviceOGL::DebugInput() { -#ifdef OGL_DEBUG bool dump_me = false; uint32 start = theApp.GetConfig("debug_ogl_dump", 0); uint32 length = theApp.GetConfig("debug_ogl_dump_length", 5); if ( (start != 0 && g_frame_count >= start && g_frame_count < (start + length)) ) dump_me = true; -#endif - - // DUMP INPUT -#ifdef OGL_DEBUG + if ( dump_me ) { for (auto i = 0 ; i < 3 ; i++) { if (m_state.ps_srv[i] != NULL) { @@ -516,12 +512,15 @@ void GSDeviceOGL::DrawPrimitive() m_state.bs->debug(); m_state.dss->debug(); } -#endif +} - m_state.vb->draw_arrays(); +void GSDeviceOGL::DebugOutput() +{ + bool dump_me = false; + uint32 start = theApp.GetConfig("debug_ogl_dump", 0); + uint32 length = theApp.GetConfig("debug_ogl_dump_length", 5); + if ( (start != 0 && g_frame_count >= start && g_frame_count < (start + length)) ) dump_me = true; - // DUMP OUTPUT -#ifdef OGL_DEBUG if ( dump_me ) { if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_f%d__d%d.bmp", g_frame_count, g_draw_count)); //if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/ds_out_%d.bmp", g_draw_count)); @@ -529,7 +528,32 @@ void GSDeviceOGL::DrawPrimitive() fprintf(stderr, "\n"); } +} +void GSDeviceOGL::DrawPrimitive() +{ +#ifdef OGL_DEBUG + DebugInput(); +#endif + + m_state.vb->DrawPrimitive(); + +#ifdef OGL_DEBUG + DebugOutput(); + g_draw_count++; +#endif +} + +void GSDeviceOGL::DrawIndexedPrimitive() +{ +#ifdef OGL_DEBUG + DebugInput(); +#endif + + m_state.vb->DrawIndexedPrimitive(); + +#ifdef OGL_DEBUG + DebugOutput(); g_draw_count++; #endif } @@ -929,7 +953,7 @@ GSTexture* GSDeviceOGL::Resolve(GSTexture* t) void GSDeviceOGL::EndScene() { - m_state.vb->draw_done(); + m_state.vb->EndScene(); } void GSDeviceOGL::SetUniformBuffer(GSUniformBufferOGL* cb) @@ -950,7 +974,12 @@ void GSDeviceOGL::IASetVertexState(GSVertexBufferStateOGL* vb) void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count) { - m_state.vb->upload(vertices, count); + m_state.vb->UploadVB(vertices, count); +} + +void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count) +{ + m_state.vb->UploadIB(index, count); } void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index cd10698802..474eb37aec 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -283,41 +283,135 @@ struct GSInputLayoutOGL { }; class GSVertexBufferStateOGL { - size_t m_stride; - size_t m_start; - size_t m_count; - size_t m_limit; - GLuint m_vb; - GLuint m_va; - const GLenum m_target; - GLenum m_topology; + class GSBufferOGL { + size_t m_stride; + size_t m_start; + size_t m_count; + size_t m_limit; + GLenum m_target; + GLuint m_buffer; + size_t m_default_size; - void allocate(size_t new_limit) - { - m_start = 0; - m_limit = new_limit; - glBufferData(m_target, m_limit * m_stride, NULL, GL_STREAM_DRAW); - } + public: + GSBufferOGL(GLenum target, size_t stride) : + m_stride(stride) + , m_start(0) + , m_count(0) + , m_limit(0) + , m_target(target) + { + glGenBuffers(1, &m_buffer); + // Opengl works best with 1-4MB buffer. + m_default_size = 2 * 1024 * 1024 / m_stride; + } + + ~GSBufferOGL() { glDeleteBuffers(1, &m_buffer); } + + void allocate() { allocate(m_default_size); } + + void allocate(size_t new_limit) + { + m_start = 0; + m_limit = new_limit; + glBufferData(GL_ARRAY_BUFFER, m_limit * m_stride, NULL, GL_STREAM_DRAW); + } + + void bind() + { + glBindBuffer(m_target, m_buffer); + } + + void upload(const void* src, uint32 count) + { +#ifdef OGL_DEBUG + GLint b_size = -1; + glGetBufferParameteriv(m_target, GL_BUFFER_SIZE, &b_size); + + if (b_size <= 0) return; +#endif + + m_count = count; + + // Note: For an explanation of the map flag + // see http://www.opengl.org/wiki/Buffer_Object_Streaming + uint32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT; + + // Current GPU buffer is really too small need to allocate a new one + if (m_count > m_limit) { + allocate(std::max(count * 3 / 2, m_default_size)); + + } else if (m_count > (m_limit - m_start) ) { + // Not enough left free room. Just go back at the beginning + m_start = 0; + + // Tell the driver that it can orphan previous buffer and restart from a scratch buffer. + // Technically the buffer will not be accessible by the application anymore but the + // GL will effectively remove it when draws call are finised. + map_flags |= GL_MAP_INVALIDATE_BUFFER_BIT; + } else { + // Tell the driver that it doesn't need to contain any valid buffer data, and that you promise to write the entire range you map + map_flags |= GL_MAP_INVALIDATE_RANGE_BIT; + } + + // Upload the data to the buffer + uint8* dst = (uint8*) glMapBufferRange(m_target, m_stride*m_start, m_stride*m_count, map_flags); +#ifdef OGL_DEBUG + if (dst == NULL) { + fprintf(stderr, "CRITICAL ERROR map failed for vb!!!\n"); + return; + } +#endif + memcpy(dst, src, m_stride*m_count); + glUnmapBuffer(m_target); + } + + void EndScene() + { + m_start += m_count; + m_count = 0; + } + + void Draw(GLenum mode) + { + glDrawArrays(mode, m_start, m_count); + } + + void Draw(GLenum mode, GLint basevertex) + { + glDrawElementsBaseVertex(mode, m_count, GL_UNSIGNED_INT, (void*)(m_start * m_stride), basevertex); + } + + size_t GetStart() { return m_start; } + + } *m_vb, *m_ib; + + GLuint m_va; + GLenum m_topology; public: GSVertexBufferStateOGL(size_t stride, GSInputLayoutOGL* layout, uint32 layout_nbr) - : m_stride(stride) - , m_count(0) - , m_target(GL_ARRAY_BUFFER) { - glGenBuffers(1, &m_vb); glGenVertexArrays(1, &m_va); + + m_vb = new GSBufferOGL(GL_ARRAY_BUFFER, stride); + m_ib = new GSBufferOGL(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32)); + bind(); - allocate(60000); // Opengl works best with 1-4MB buffer. 60k element seems a good value. Note stride is 32 + // Note: index array are part of the VA state so it need to be bind only once. + m_ib->bind(); + + m_vb->allocate(); + m_ib->allocate(); set_internal_format(layout, layout_nbr); } void bind() { glBindVertexArray(m_va); - glBindBuffer(m_target, m_vb); + m_vb->bind(); } +#if 0 void upload(const void* src, uint32 count) { #ifdef OGL_DEBUG @@ -361,6 +455,7 @@ public: memcpy(dst, src, m_stride*m_count); glUnmapBuffer(m_target); } +#endif void set_internal_format(GSInputLayoutOGL* layout, uint32 layout_nbr) { @@ -380,44 +475,52 @@ public: } } - void draw_arrays() + void EndScene() { - glDrawArrays(m_topology, m_start, m_count); + m_vb->EndScene(); + m_ib->EndScene(); } - void draw_done() - { - m_start += m_count; - m_count = 0; - } + void DrawPrimitive() { m_vb->Draw(m_topology); } + + void DrawIndexedPrimitive() { m_ib->Draw(m_topology, m_vb->GetStart() ); } void SetTopology(GLenum topology) { m_topology = topology; } + void UploadVB(const void* vertices, size_t count) + { + m_vb->upload(vertices, count); + } + + void UploadIB(const void* index, size_t count) + { + m_ib->upload(index, count); + } + ~GSVertexBufferStateOGL() { - glDeleteBuffers(1, &m_vb); glDeleteVertexArrays(1, &m_va); } void debug() { - uint32 element; + uint32 element = 0; string topo; switch (m_topology) { case GL_POINTS: - element = m_count; + //element = m_count; topo = "point"; break; case GL_LINES: - element = m_count/2; + //element = m_count/2; topo = "line"; break; case GL_TRIANGLES: - element = m_count/3; + //element = m_count/3; topo = "triangle"; break; case GL_TRIANGLE_STRIP: - element = m_count - 2; + //element = m_count - 2; topo = "triangle strip"; break; } @@ -788,6 +891,8 @@ class GSDeviceOGL : public GSDevice void CheckDebugLog(); static void DebugOutputToFile(unsigned int source, unsigned int type, unsigned int id, unsigned int severity, const char* message); + void DebugOutput(); + void DebugInput(); bool HasStencil() { return true; } bool HasDepth32() { return true; } @@ -797,6 +902,7 @@ class GSDeviceOGL : public GSDevice void Flip(); void DrawPrimitive(); + void DrawIndexedPrimitive(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); @@ -825,6 +931,7 @@ class GSDeviceOGL : public GSDevice void IASetPrimitiveTopology(GLenum topology); void IASetVertexBuffer(const void* vertices, size_t count); + void IASetIndexBuffer(const void* index, size_t count); void IASetVertexState(GSVertexBufferStateOGL* vb); void SetUniformBuffer(GSUniformBufferOGL* cb); @@ -844,7 +951,7 @@ class GSDeviceOGL : public GSDevice void CreateTextureFX(); - void SetupIA(const void* vertices, int count, GLenum prim); + void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); void SetupVS(VSSelector sel, const VSConstantBuffer* cb); void SetupGS(GSSelector sel); void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index e7d982b54d..5686d8555d 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -26,7 +26,8 @@ GSRendererOGL::GSRendererOGL() // FIXME //: GSRendererHW(new GSTextureCacheOGL(this)) - : GSRendererHW(new GSTextureCacheOGL(this)) + //: GSRendererHW(new GSTextureCacheOGL(this)) + : GSRendererHW(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCacheOGL(this)) , m_topology(0) { m_logz = !!theApp.GetConfig("logz", 0); @@ -36,7 +37,7 @@ GSRendererOGL::GSRendererOGL() // TODO must be implementer with macro InitVertexKick(GSRendererOGL) // template void VertexKick(bool skip); - InitVertexKick(GSRendererOGL); + InitConvertVertex(GSRendererOGL); } bool GSRendererOGL::CreateDevice(GSDevice* dev) @@ -47,6 +48,27 @@ bool GSRendererOGL::CreateDevice(GSDevice* dev) return true; } +template +void GSRendererOGL::ConvertVertex(size_t dst_index, size_t src_index) +{ + GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index); + GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index; + + GSVector4i v0 = ((GSVector4i*)s)[0]; + GSVector4i v1 = ((GSVector4i*)s)[1]; + + if(tme && fst) + { + // TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed + + v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st + } + + ((GSVector4i*)d)[0] = v0; + ((GSVector4i*)d)[1] = v1; +} + +#if 0 template void GSRendererOGL::VertexKick(bool skip) { @@ -222,3 +244,832 @@ void GSRendererOGL::VertexKick(bool skip) m_count += count; } } +#endif + +#if 0 + { + + switch(m_vt.m_primclass) + { + case GS_POINT_CLASS: + m_topology = GL_POINTS; + m_perfmon.Put(GSPerfMon::Prim, m_count); + break; + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: + m_topology = GL_LINES; + m_perfmon.Put(GSPerfMon::Prim, m_count / 2); + break; + case GS_TRIANGLE_CLASS: + m_topology = GL_TRIANGLES; + m_perfmon.Put(GSPerfMon::Prim, m_count / 3); + break; + default: + __assume(0); + } + + + GSDrawingEnvironment& env = m_env; + GSDrawingContext* context = m_context; + + const GSVector2i& rtsize = rt->GetSize(); + const GSVector2& rtscale = rt->GetScale(); + + bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; + + GSTexture *rtcopy = NULL; + + ASSERT(m_dev != NULL); + + GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; + + if(DATE) + { + if(dev->HasStencil()) + { + GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); + GSVector4 o = GSVector4(-1.0f, 1.0f); + + GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); + GSVector4 dst = src * 2.0f + o.xxxx(); + + GSVertexPT1 vertices[] = + { + {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, + {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, + {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, + {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, + }; + + dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); + } + else + { + rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); + + // I'll use VertexTrace when I consider it more trustworthy + + dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); + } + } + + // + + dev->BeginScene(); + + // om + + GSDeviceOGL::OMDepthStencilSelector om_dssel; + + if(context->TEST.ZTE) + { + om_dssel.ztst = context->TEST.ZTST; + om_dssel.zwe = !context->ZBUF.ZMSK; + } + else + { + om_dssel.ztst = ZTST_ALWAYS; + } + + if(m_fba) + { + om_dssel.fba = context->FBA.FBA; + } + + GSDeviceOGL::OMBlendSelector om_bsel; + + if(!IsOpaque()) + { + om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; + + om_bsel.a = context->ALPHA.A; + om_bsel.b = context->ALPHA.B; + om_bsel.c = context->ALPHA.C; + om_bsel.d = context->ALPHA.D; + + if(env.PABE.PABE) + { + if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) + { + // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader + // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result + + om_bsel.abe = 0; + } + else + { + //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. + //ASSERT(0); + } + } + } + + om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); + + // vs + + GSDeviceOGL::VSSelector vs_sel; + + vs_sel.tme = PRIM->TME; + vs_sel.fst = PRIM->FST; + vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; + vs_sel.rtcopy = !!rtcopy; + + // The real GS appears to do no masking based on the Z buffer format and writing larger Z values + // than the buffer supports seems to be an error condition on the real GS, causing it to crash. + // We are probably receiving bad coordinates from VU1 in these cases. + + if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) + { + if(context->ZBUF.PSM == PSM_PSMZ24) + { + if(m_vt.m_max.p.z > 0xffffff) + { + ASSERT(m_vt.m_min.p.z > 0xffffff); + // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. + if (m_vt.m_min.p.z > 0xffffff) + { + vs_sel.bppz = 1; + om_dssel.ztst = ZTST_ALWAYS; + } + } + } + else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) + { + if(m_vt.m_max.p.z > 0xffff) + { + ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo + // Fixme : Same as above, I guess. + if (m_vt.m_min.p.z > 0xffff) + { + vs_sel.bppz = 2; + om_dssel.ztst = ZTST_ALWAYS; + } + } + } + } + + // FIXME Opengl support half pixel center (as dx10). Code could be easier!!! + GSDeviceOGL::VSConstantBuffer vs_cb; + + float sx = 2.0f * rtscale.x / (rtsize.x << 4); + float sy = 2.0f * rtscale.y / (rtsize.y << 4); + float ox = (float)(int)context->XYOFFSET.OFX; + float oy = (float)(int)context->XYOFFSET.OFY; + float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; + float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; + + //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, + //because DX10 and DX9 have a different pixel center.) + // + //The resulting shifted output aligns better with common blending / corona / blurring effects, + //but introduces a few bad pixels on the edges. + + if(rt->LikelyOffset) + { + // DX9 has pixelcenter set to 0.0, so give it some value here + + if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } + + ox2 *= rt->OffsetHack_modx; + oy2 *= rt->OffsetHack_mody; + } + + vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); + vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); + // END of FIXME + + // gs + + GSDeviceOGL::GSSelector gs_sel; + + gs_sel.iip = PRIM->IIP; + gs_sel.prim = m_vt.m_primclass; + + // ps + + GSDeviceOGL::PSSelector ps_sel; + GSDeviceOGL::PSSamplerSelector ps_ssel; + GSDeviceOGL::PSConstantBuffer ps_cb; + + if(DATE) + { + if(dev->HasStencil()) + { + om_dssel.date = 1; + } + else + { + ps_sel.date = 1 + context->TEST.DATM; + } + } + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + ps_sel.colclip = 1; + } + + ps_sel.clr1 = om_bsel.IsCLR1(); + ps_sel.fba = context->FBA.FBA; + ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; + + if(UserHacks_AlphaHack) ps_sel.aout = 1; + + if(PRIM->FGE) + { + ps_sel.fog = 1; + + ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; + } + + if(context->TEST.ATE) + { + ps_sel.atst = context->TEST.ATST; + + switch(ps_sel.atst) + { + case ATST_LESS: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); + break; + case ATST_GREATER: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); + break; + default: + ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; + break; + } + } + else + { + ps_sel.atst = ATST_ALWAYS; + } + + if(tex) + { + ps_sel.wms = context->CLAMP.WMS; + ps_sel.wmt = context->CLAMP.WMT; + ps_sel.fmt = tex->m_fmt; + ps_sel.aem = env.TEXA.AEM; + ps_sel.tfx = context->TEX0.TFX; + ps_sel.tcc = context->TEX0.TCC; + ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter; + ps_sel.rt = tex->m_target; + + int w = tex->m_texture->GetWidth(); + int h = tex->m_texture->GetHeight(); + + int tw = (int)(1 << context->TEX0.TW); + int th = (int)(1 << context->TEX0.TH); + + GSVector4 WH(tw, th, w, h); + + if(PRIM->FST) + { + vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); + //Maybe better? + //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); + ps_sel.fst = 1; + } + + ps_cb.WH = WH; + ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); + ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); + + GSVector4 clamp(ps_cb.MskFix); + GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); + + ps_cb.MinMax = clamp / WH.xyxy(); + ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); + + ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; + ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; + ps_ssel.ltf = ps_sel.ltf; + } + else + { + ps_sel.tfx = 4; + } + + // rs + + GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); + + dev->OMSetRenderTargets(rt, ds, &scissor); + dev->PSSetShaderResource(0, tex ? tex->m_texture : 0); + dev->PSSetShaderResource(1, tex ? tex->m_palette : 0); + dev->PSSetShaderResource(2, rtcopy); + + uint8 afix = context->ALPHA.FIX; + + dev->SetupOM(om_dssel, om_bsel, afix); + dev->SetupIA(m_vertices, m_count, m_topology); + dev->SetupVS(vs_sel, &vs_cb); + dev->SetupGS(gs_sel); + dev->SetupPS(ps_sel, &ps_cb, ps_ssel); + + // draw + + if(context->TEST.DoFirstPass()) + { + dev->DrawPrimitive(); + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel); + GSDeviceOGL::PSSelector ps_selneg(ps_sel); + + om_bselneg.negative = 1; + ps_selneg.colclip = 2; + + dev->SetupOM(om_dssel, om_bselneg, afix); + dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); + + dev->DrawPrimitive(); + } + } + + if(context->TEST.DoSecondPass()) + { + ASSERT(!env.PABE.PABE); + + static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; + + ps_sel.atst = iatst[ps_sel.atst]; + + switch(ps_sel.atst) + { + case ATST_LESS: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); + break; + case ATST_GREATER: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); + break; + default: + ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; + break; + } + + dev->SetupPS(ps_sel, &ps_cb, ps_ssel); + + bool z = om_dssel.zwe; + bool r = om_bsel.wr; + bool g = om_bsel.wg; + bool b = om_bsel.wb; + bool a = om_bsel.wa; + + switch(context->TEST.AFAIL) + { + case 0: z = r = g = b = a = false; break; // none + case 1: z = false; break; // rgba + case 2: r = g = b = a = false; break; // z + case 3: z = a = false; break; // rgb + default: __assume(0); + } + + if(z || r || g || b || a) + { + om_dssel.zwe = z; + om_bsel.wr = r; + om_bsel.wg = g; + om_bsel.wb = b; + om_bsel.wa = a; + + dev->SetupOM(om_dssel, om_bsel, afix); + + dev->DrawPrimitive(); + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel); + GSDeviceOGL::PSSelector ps_selneg(ps_sel); + + om_bselneg.negative = 1; + ps_selneg.colclip = 2; + + dev->SetupOM(om_dssel, om_bselneg, afix); + dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); + + dev->DrawPrimitive(); + } + } + } + + dev->EndScene(); + + dev->Recycle(rtcopy); + + if(om_dssel.fba) UpdateFBA(rt); + } +#endif + +void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) +{ + switch(m_vt->m_primclass) + { + case GS_POINT_CLASS: + m_topology = GL_POINTS; + break; + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: + m_topology = GL_LINES; + break; + case GS_TRIANGLE_CLASS: + m_topology = GL_TRIANGLES; + break; + default: + __assume(0); + } + + GSDrawingEnvironment& env = m_env; + GSDrawingContext* context = m_context; + + const GSVector2i& rtsize = rt->GetSize(); + const GSVector2& rtscale = rt->GetScale(); + + bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; + + //OGL GSTexture* rtcopy = NULL; + + ASSERT(m_dev != NULL); + + GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; + + if(DATE) + { + // Note at the moment OGL has always stencil. Rt can be disabled + if(dev->HasStencil()) + { + GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); + GSVector4 o = GSVector4(-1.0f, 1.0f); + + GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); + GSVector4 dst = src * 2.0f + o.xxxx(); + + GSVertexPT1 vertices[] = + { + {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, + {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, + {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, + {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, + }; + + dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); + } + else + { + //OGL rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); + + //OGL // I'll use VertexTrace when I consider it more trustworthy + + //OGL dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); + } + } + + // + + dev->BeginScene(); + + // om + + GSDeviceOGL::OMDepthStencilSelector om_dssel; + + if(context->TEST.ZTE) + { + om_dssel.ztst = context->TEST.ZTST; + om_dssel.zwe = !context->ZBUF.ZMSK; + } + else + { + om_dssel.ztst = ZTST_ALWAYS; + } + + if(m_fba) + { + om_dssel.fba = context->FBA.FBA; + } + + GSDeviceOGL::OMBlendSelector om_bsel; + + if(!IsOpaque()) + { + om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS; + + om_bsel.a = context->ALPHA.A; + om_bsel.b = context->ALPHA.B; + om_bsel.c = context->ALPHA.C; + om_bsel.d = context->ALPHA.D; + + if(env.PABE.PABE) + { + if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) + { + // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader + // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result + + om_bsel.abe = 0; + } + else + { + //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. + //ASSERT(0); + } + } + } + + om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); + + // vs + + GSDeviceOGL::VSSelector vs_sel; + + vs_sel.tme = PRIM->TME; + vs_sel.fst = PRIM->FST; + vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; + //OGL vs_sel.rtcopy = !!rtcopy; + vs_sel.rtcopy = false; + + // The real GS appears to do no masking based on the Z buffer format and writing larger Z values + // than the buffer supports seems to be an error condition on the real GS, causing it to crash. + // We are probably receiving bad coordinates from VU1 in these cases. + + if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) + { + if(context->ZBUF.PSM == PSM_PSMZ24) + { + if(m_vt->m_max.p.z > 0xffffff) + { + ASSERT(m_vt->m_min.p.z > 0xffffff); + // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. + if (m_vt->m_min.p.z > 0xffffff) + { + vs_sel.bppz = 1; + om_dssel.ztst = ZTST_ALWAYS; + } + } + } + else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) + { + if(m_vt->m_max.p.z > 0xffff) + { + ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo + // Fixme : Same as above, I guess. + if (m_vt->m_min.p.z > 0xffff) + { + vs_sel.bppz = 2; + om_dssel.ztst = ZTST_ALWAYS; + } + } + } + } + + // FIXME Opengl support half pixel center (as dx10). Code could be easier!!! + GSDeviceOGL::VSConstantBuffer vs_cb; + + float sx = 2.0f * rtscale.x / (rtsize.x << 4); + float sy = 2.0f * rtscale.y / (rtsize.y << 4); + float ox = (float)(int)context->XYOFFSET.OFX; + float oy = (float)(int)context->XYOFFSET.OFY; + float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; + float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; + + //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, + //because DX10 and DX9 have a different pixel center.) + // + //The resulting shifted output aligns better with common blending / corona / blurring effects, + //but introduces a few bad pixels on the edges. + + if(rt->LikelyOffset) + { + // DX9 has pixelcenter set to 0.0, so give it some value here + + if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } + + ox2 *= rt->OffsetHack_modx; + oy2 *= rt->OffsetHack_mody; + } + + vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); + vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); + // END of FIXME + + // gs + + GSDeviceOGL::GSSelector gs_sel; + + gs_sel.iip = PRIM->IIP; + gs_sel.prim = m_vt->m_primclass; + + // ps + + GSDeviceOGL::PSSelector ps_sel; + GSDeviceOGL::PSSamplerSelector ps_ssel; + GSDeviceOGL::PSConstantBuffer ps_cb; + + if(DATE) + { + if(dev->HasStencil()) + { + om_dssel.date = 1; + } + else + { + ps_sel.date = 1 + context->TEST.DATM; + } + } + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + ps_sel.colclip = 1; + } + + ps_sel.clr1 = om_bsel.IsCLR1(); + ps_sel.fba = context->FBA.FBA; + ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; + + if(UserHacks_AlphaHack) ps_sel.aout = 1; + + if(PRIM->FGE) + { + ps_sel.fog = 1; + + ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; + } + + if(context->TEST.ATE) + { + ps_sel.atst = context->TEST.ATST; + + switch(ps_sel.atst) + { + case ATST_LESS: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); + break; + case ATST_GREATER: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); + break; + default: + ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; + break; + } + } + else + { + ps_sel.atst = ATST_ALWAYS; + } + + if(tex) + { + ps_sel.wms = context->CLAMP.WMS; + ps_sel.wmt = context->CLAMP.WMT; + ps_sel.fmt = tex->m_fmt; + ps_sel.aem = env.TEXA.AEM; + ps_sel.tfx = context->TEX0.TFX; + ps_sel.tcc = context->TEX0.TCC; + ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter; + ps_sel.rt = tex->m_target; + + int w = tex->m_texture->GetWidth(); + int h = tex->m_texture->GetHeight(); + + int tw = (int)(1 << context->TEX0.TW); + int th = (int)(1 << context->TEX0.TH); + + GSVector4 WH(tw, th, w, h); + + if(PRIM->FST) + { + vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); + //Maybe better? + //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); + ps_sel.fst = 1; + } + + ps_cb.WH = WH; + ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); + ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); + + GSVector4 clamp(ps_cb.MskFix); + GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); + + ps_cb.MinMax = clamp / WH.xyxy(); + ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); + + ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; + ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; + ps_ssel.ltf = ps_sel.ltf; + } + else + { + ps_sel.tfx = 4; + } + + // rs + + GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); + + dev->OMSetRenderTargets(rt, ds, &scissor); + dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL); + dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL); + //OGL dev->PSSetShaderResource(2, rtcopy); + + uint8 afix = context->ALPHA.FIX; + + dev->SetupOM(om_dssel, om_bsel, afix); + dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology); + dev->SetupVS(vs_sel, &vs_cb); + dev->SetupGS(gs_sel); + dev->SetupPS(ps_sel, &ps_cb, ps_ssel); + + // draw + + if(context->TEST.DoFirstPass()) + { + dev->DrawIndexedPrimitive(); + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel); + GSDeviceOGL::PSSelector ps_selneg(ps_sel); + + om_bselneg.negative = 1; + ps_selneg.colclip = 2; + + dev->SetupOM(om_dssel, om_bselneg, afix); + dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); + + dev->DrawIndexedPrimitive(); + } + } + + if(context->TEST.DoSecondPass()) + { + ASSERT(!env.PABE.PABE); + + static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; + + ps_sel.atst = iatst[ps_sel.atst]; + + switch(ps_sel.atst) + { + case ATST_LESS: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); + break; + case ATST_GREATER: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); + break; + default: + ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; + break; + } + + dev->SetupPS(ps_sel, &ps_cb, ps_ssel); + + bool z = om_dssel.zwe; + bool r = om_bsel.wr; + bool g = om_bsel.wg; + bool b = om_bsel.wb; + bool a = om_bsel.wa; + + switch(context->TEST.AFAIL) + { + case 0: z = r = g = b = a = false; break; // none + case 1: z = false; break; // rgba + case 2: r = g = b = a = false; break; // z + case 3: z = a = false; break; // rgb + default: __assume(0); + } + + if(z || r || g || b || a) + { + om_dssel.zwe = z; + om_bsel.wr = r; + om_bsel.wg = g; + om_bsel.wb = b; + om_bsel.wa = a; + + dev->SetupOM(om_dssel, om_bsel, afix); + + dev->DrawIndexedPrimitive(); + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel); + GSDeviceOGL::PSSelector ps_selneg(ps_sel); + + om_bselneg.negative = 1; + ps_selneg.colclip = 2; + + dev->SetupOM(om_dssel, om_bselneg, afix); + dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); + + dev->DrawIndexedPrimitive(); + } + } + } + + dev->EndScene(); + + //OGL dev->Recycle(rtcopy); + + if(om_dssel.fba) UpdateFBA(rt); +} diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index 32109a6f5e..62736d5c04 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -30,7 +30,7 @@ // FIXME does it need a GSVertexHWOGL ??? Data order can be easily programmed on opengl (the only potential // issue is the unsupported praga push/pop // Note it impact GSVertexTrace.cpp => void GSVertexTrace::Update(const GSVertexHWOGL* v, int count, GS_PRIM_CLASS primclass) -class GSRendererOGL : public GSRendererHW +class GSRendererOGL : public GSRendererHW //class GSRendererOGL : public GSRendererHW { private: @@ -42,6 +42,14 @@ class GSRendererOGL : public GSRendererHW protected: GLenum m_topology; + template + void ConvertVertex(size_t dst_index, size_t src_index); + + int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;} + int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;} + uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;} + void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;} + public: GSRendererOGL(); virtual ~GSRendererOGL() {}; @@ -52,418 +60,5 @@ class GSRendererOGL : public GSRendererHW void UpdateFBA(GSTexture* rt) {} - void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) - { - - switch(m_vt.m_primclass) - { - case GS_POINT_CLASS: - m_topology = GL_POINTS; - m_perfmon.Put(GSPerfMon::Prim, m_count); - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - m_topology = GL_LINES; - m_perfmon.Put(GSPerfMon::Prim, m_count / 2); - break; - case GS_TRIANGLE_CLASS: - m_topology = GL_TRIANGLES; - m_perfmon.Put(GSPerfMon::Prim, m_count / 3); - break; - default: - __assume(0); - } - - - GSDrawingEnvironment& env = m_env; - GSDrawingContext* context = m_context; - - const GSVector2i& rtsize = rt->GetSize(); - const GSVector2& rtscale = rt->GetScale(); - - bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; - - GSTexture *rtcopy = NULL; - - ASSERT(m_dev != NULL); - - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; - - if(DATE) - { - if(dev->HasStencil()) - { - GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); - GSVector4 o = GSVector4(-1.0f, 1.0f); - - GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); - GSVector4 dst = src * 2.0f + o.xxxx(); - - GSVertexPT1 vertices[] = - { - {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, - {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, - {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, - {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, - }; - - dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); - } - else - { - rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); - - // I'll use VertexTrace when I consider it more trustworthy - - dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); - } - } - - // - - dev->BeginScene(); - - // om - - GSDeviceOGL::OMDepthStencilSelector om_dssel; - - if(context->TEST.ZTE) - { - om_dssel.ztst = context->TEST.ZTST; - om_dssel.zwe = !context->ZBUF.ZMSK; - } - else - { - om_dssel.ztst = ZTST_ALWAYS; - } - - if(m_fba) - { - om_dssel.fba = context->FBA.FBA; - } - - GSDeviceOGL::OMBlendSelector om_bsel; - - if(!IsOpaque()) - { - om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; - - om_bsel.a = context->ALPHA.A; - om_bsel.b = context->ALPHA.B; - om_bsel.c = context->ALPHA.C; - om_bsel.d = context->ALPHA.D; - - if(env.PABE.PABE) - { - if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) - { - // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader - // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result - - om_bsel.abe = 0; - } - else - { - //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. - //ASSERT(0); - } - } - } - - om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); - - // vs - - GSDeviceOGL::VSSelector vs_sel; - - vs_sel.tme = PRIM->TME; - vs_sel.fst = PRIM->FST; - vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; - vs_sel.rtcopy = !!rtcopy; - - // The real GS appears to do no masking based on the Z buffer format and writing larger Z values - // than the buffer supports seems to be an error condition on the real GS, causing it to crash. - // We are probably receiving bad coordinates from VU1 in these cases. - - if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) - { - if(context->ZBUF.PSM == PSM_PSMZ24) - { - if(m_vt.m_max.p.z > 0xffffff) - { - ASSERT(m_vt.m_min.p.z > 0xffffff); - // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. - if (m_vt.m_min.p.z > 0xffffff) - { - vs_sel.bppz = 1; - om_dssel.ztst = ZTST_ALWAYS; - } - } - } - else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) - { - if(m_vt.m_max.p.z > 0xffff) - { - ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo - // Fixme : Same as above, I guess. - if (m_vt.m_min.p.z > 0xffff) - { - vs_sel.bppz = 2; - om_dssel.ztst = ZTST_ALWAYS; - } - } - } - } - - // FIXME Opengl support half pixel center (as dx10). Code could be easier!!! - GSDeviceOGL::VSConstantBuffer vs_cb; - - float sx = 2.0f * rtscale.x / (rtsize.x << 4); - float sy = 2.0f * rtscale.y / (rtsize.y << 4); - float ox = (float)(int)context->XYOFFSET.OFX; - float oy = (float)(int)context->XYOFFSET.OFY; - float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; - float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; - - //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, - //because DX10 and DX9 have a different pixel center.) - // - //The resulting shifted output aligns better with common blending / corona / blurring effects, - //but introduces a few bad pixels on the edges. - - if(rt->LikelyOffset) - { - // DX9 has pixelcenter set to 0.0, so give it some value here - - if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } - - ox2 *= rt->OffsetHack_modx; - oy2 *= rt->OffsetHack_mody; - } - - vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); - vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); - // END of FIXME - - // gs - - GSDeviceOGL::GSSelector gs_sel; - - gs_sel.iip = PRIM->IIP; - gs_sel.prim = m_vt.m_primclass; - - // ps - - GSDeviceOGL::PSSelector ps_sel; - GSDeviceOGL::PSSamplerSelector ps_ssel; - GSDeviceOGL::PSConstantBuffer ps_cb; - - if(DATE) - { - if(dev->HasStencil()) - { - om_dssel.date = 1; - } - else - { - ps_sel.date = 1 + context->TEST.DATM; - } - } - - if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - ps_sel.colclip = 1; - } - - ps_sel.clr1 = om_bsel.IsCLR1(); - ps_sel.fba = context->FBA.FBA; - ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; - - if(UserHacks_AlphaHack) ps_sel.aout = 1; - - if(PRIM->FGE) - { - ps_sel.fog = 1; - - ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; - } - - if(context->TEST.ATE) - { - ps_sel.atst = context->TEST.ATST; - - switch(ps_sel.atst) - { - case ATST_LESS: - ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); - break; - case ATST_GREATER: - ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); - break; - default: - ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; - break; - } - } - else - { - ps_sel.atst = ATST_ALWAYS; - } - - if(tex) - { - ps_sel.wms = context->CLAMP.WMS; - ps_sel.wmt = context->CLAMP.WMT; - ps_sel.fmt = tex->m_fmt; - ps_sel.aem = env.TEXA.AEM; - ps_sel.tfx = context->TEX0.TFX; - ps_sel.tcc = context->TEX0.TCC; - ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter; - ps_sel.rt = tex->m_target; - - int w = tex->m_texture->GetWidth(); - int h = tex->m_texture->GetHeight(); - - int tw = (int)(1 << context->TEX0.TW); - int th = (int)(1 << context->TEX0.TH); - - GSVector4 WH(tw, th, w, h); - - if(PRIM->FST) - { - vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); - //Maybe better? - //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); - ps_sel.fst = 1; - } - - ps_cb.WH = WH; - ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); - ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); - - GSVector4 clamp(ps_cb.MskFix); - GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); - - ps_cb.MinMax = clamp / WH.xyxy(); - ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); - - ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; - ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; - ps_ssel.ltf = ps_sel.ltf; - } - else - { - ps_sel.tfx = 4; - } - - // rs - - GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); - - dev->OMSetRenderTargets(rt, ds, &scissor); - dev->PSSetShaderResource(0, tex ? tex->m_texture : 0); - dev->PSSetShaderResource(1, tex ? tex->m_palette : 0); - dev->PSSetShaderResource(2, rtcopy); - - uint8 afix = context->ALPHA.FIX; - - dev->SetupOM(om_dssel, om_bsel, afix); - dev->SetupIA(m_vertices, m_count, m_topology); - dev->SetupVS(vs_sel, &vs_cb); - dev->SetupGS(gs_sel); - dev->SetupPS(ps_sel, &ps_cb, ps_ssel); - - // draw - - if(context->TEST.DoFirstPass()) - { - dev->DrawPrimitive(); - - if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel); - GSDeviceOGL::PSSelector ps_selneg(ps_sel); - - om_bselneg.negative = 1; - ps_selneg.colclip = 2; - - dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); - - dev->DrawPrimitive(); - } - } - - if(context->TEST.DoSecondPass()) - { - ASSERT(!env.PABE.PABE); - - static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; - - ps_sel.atst = iatst[ps_sel.atst]; - - switch(ps_sel.atst) - { - case ATST_LESS: - ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); - break; - case ATST_GREATER: - ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); - break; - default: - ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; - break; - } - - dev->SetupPS(ps_sel, &ps_cb, ps_ssel); - - bool z = om_dssel.zwe; - bool r = om_bsel.wr; - bool g = om_bsel.wg; - bool b = om_bsel.wb; - bool a = om_bsel.wa; - - switch(context->TEST.AFAIL) - { - case 0: z = r = g = b = a = false; break; // none - case 1: z = false; break; // rgba - case 2: r = g = b = a = false; break; // z - case 3: z = a = false; break; // rgb - default: __assume(0); - } - - if(z || r || g || b || a) - { - om_dssel.zwe = z; - om_bsel.wr = r; - om_bsel.wg = g; - om_bsel.wb = b; - om_bsel.wa = a; - - dev->SetupOM(om_dssel, om_bsel, afix); - - dev->DrawPrimitive(); - - if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel); - GSDeviceOGL::PSSelector ps_selneg(ps_sel); - - om_bselneg.negative = 1; - ps_selneg.colclip = 2; - - dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); - - dev->DrawPrimitive(); - } - } - } - - dev->EndScene(); - - dev->Recycle(rtcopy); - - if(om_dssel.fba) UpdateFBA(rt); - } + void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); }; diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 0e61c2b7bc..5616a72078 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -75,10 +75,11 @@ void GSDeviceOGL::CreateTextureFX() m_vb = new GSVertexBufferStateOGL(sizeof(GSVertexHW11), vert_format, countof(vert_format)); } -void GSDeviceOGL::SetupIA(const void* vertices, int count, GLenum prim) +void GSDeviceOGL::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) { IASetVertexState(m_vb); - IASetVertexBuffer(vertices, count); + IASetVertexBuffer(vertex, vertex_count); + IASetIndexBuffer(index, index_count); IASetPrimitiveTopology(prim); } diff --git a/plugins/GSdx/linux_replay.cpp b/plugins/GSdx/linux_replay.cpp index f2d25ab8c9..0de8a06825 100644 --- a/plugins/GSdx/linux_replay.cpp +++ b/plugins/GSdx/linux_replay.cpp @@ -37,5 +37,5 @@ int main ( int argc, char *argv[] ) if ( argc != 3 ) help(); GSsetSettingsDir(argv[1]); - GSReplay(argv[2], 12); + GSReplay(argv[2], 15); }