/* * Copyright (C) 2007-2009 Gabest * http://www.gabest.org * * This Program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This Program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * http://www.gnu.org/copyleft/gpl.html * */ #pragma once #include "GSRenderer.h" #include "GSTextureCacheSW.h" #include "GSDrawScanline.h" extern const GSVector4 g_pos_scale; template class GSRendererSW : public GSRendererT { protected: GSRasterizerList m_rl; GSTextureCacheSW* m_tc; GSVertexTrace m_vtrace; Texture m_texture[2]; bool m_reset; void Reset() { // TODO: GSreset can come from the main thread too => crash // m_tc->RemoveAll(); m_reset = true; __super::Reset(); } void VSync(int field) { __super::VSync(field); m_tc->IncAge(); if(m_reset) { m_tc->RemoveAll(); m_reset = false; } // if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats(); } void ResetDevice() { m_texture[0] = Texture(); m_texture[1] = Texture(); } bool GetOutput(int i, Texture& t) { CRect r(0, 0, DISPFB[i]->FBW * 64, GetFrameRect(i).bottom); // TODO: round up bottom if(m_texture[i].GetWidth() != r.Width() || m_texture[i].GetHeight() != r.Height()) { m_texture[i] = Texture(); } if(!m_texture[i] && !m_dev.CreateTexture(m_texture[i], r.Width(), r.Height())) { return false; } GIFRegTEX0 TEX0; TEX0.TBP0 = DISPFB[i]->Block(); TEX0.TBW = DISPFB[i]->FBW; TEX0.PSM = DISPFB[i]->PSM; GIFRegCLAMP CLAMP; CLAMP.WMS = CLAMP.WMT = 1; // TODO static BYTE* buff = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16); static int pitch = 1024 * 4; m_mem.ReadTexture(r, buff, pitch, TEX0, m_env.TEXA, CLAMP); m_texture[i].Update(r, buff, pitch); t = m_texture[i]; if(s_dump) { CString str; str.Format(_T("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM); if(s_save) t.Save(str); } return true; } bool TryAlphaTest(DWORD& fm, DWORD& zm) { const GSDrawingEnvironment& env = m_env; const GSDrawingContext* context = m_context; bool pass = true; if(context->TEST.ATST == ATST_NEVER) { pass = false; } else if(context->TEST.ATST != ATST_ALWAYS) { GSVector4i af = GSVector4i(m_vtrace.m_min.c.wwww(m_vtrace.m_max.c)) >> 7; int amin, amax; if(PRIM->TME && (context->TEX0.TCC || context->TEX0.TFX == TFX_DECAL)) { DWORD bpp = GSLocalMemory::m_psm[context->TEX0.PSM].trbpp; DWORD cbpp = GSLocalMemory::m_psm[context->TEX0.CPSM].trbpp; DWORD pal = GSLocalMemory::m_psm[context->TEX0.PSM].pal; if(bpp == 32) { return false; } else if(bpp == 24) { amin = env.TEXA.AEM ? 0 : env.TEXA.TA0; amax = env.TEXA.TA0; } else if(bpp == 16) { amin = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1); amax = max(env.TEXA.TA0, env.TEXA.TA1); } else { m_mem.m_clut.GetAlphaMinMax32(amin, amax); } switch(context->TEX0.TFX) { case TFX_MODULATE: amin = (amin * af.x) >> 7; amax = (amax * af.z) >> 7; if(amin > 255) amin = 255; if(amax > 255) amax = 255; break; case TFX_DECAL: break; case TFX_HIGHLIGHT: amin = amin + af.x; amax = amax + af.z; if(amin > 255) amin = 255; if(amax > 255) amax = 255; break; case TFX_HIGHLIGHT2: break; default: __assume(0); } } else { amin = af.x; amax = af.z; } int aref = context->TEST.AREF; switch(context->TEST.ATST) { case ATST_NEVER: pass = false; break; case ATST_ALWAYS: pass = true; break; case ATST_LESS: if(amax < aref) pass = true; else if(amin >= aref) pass = false; else return false; break; case ATST_LEQUAL: if(amax <= aref) pass = true; else if(amin > aref) pass = false; else return false; break; case ATST_EQUAL: if(amin == aref && amax == aref) pass = true; else if(amin > aref || amax < aref) pass = false; else return false; break; case ATST_GEQUAL: if(amin >= aref) pass = true; else if(amax < aref) pass = false; else return false; break; case ATST_GREATER: if(amin > aref) pass = true; else if(amax <= aref) pass = false; else return false; break; case ATST_NOTEQUAL: if(amin == aref && amax == aref) pass = false; else if(amin > aref || amax < aref) pass = true; else return false; break; default: __assume(0); } } if(!pass) { switch(context->TEST.AFAIL) { case AFAIL_KEEP: fm = zm = 0xffffffff; break; case AFAIL_FB_ONLY: zm = 0xffffffff; break; case AFAIL_ZB_ONLY: fm = 0xffffffff; break; case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break; default: __assume(0); } } return true; } void GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) { const GSDrawingEnvironment& env = m_env; const GSDrawingContext* context = m_context; p.vm = m_mem.m_vm32; p.fbo = m_mem.GetOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM); p.zbo = m_mem.GetOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM); p.fzbo = m_mem.GetOffset4(context->FRAME, context->ZBUF); p.sel.key = 0; p.sel.fpsm = 3; p.sel.zpsm = 3; p.sel.atst = ATST_ALWAYS; p.sel.tfx = TFX_NONE; p.sel.ababcd = 255; p.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0; p.fm = context->FRAME.FBMSK; p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER) { p.fm = 0xffffffff; p.zm = 0xffffffff; } if(PRIM->TME) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } if(context->TEST.ATE) { if(!TryAlphaTest(p.fm, p.zm)) { p.sel.atst = context->TEST.ATST; p.sel.afail = context->TEST.AFAIL; } } bool fwrite = p.fm != 0xffffffff; bool ftest = p.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; p.sel.fwrite = fwrite; p.sel.ftest = ftest; if(fwrite || ftest) { p.sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM); if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vtrace.m_eq.rgba != 15) { p.sel.iip = PRIM->IIP; } if(PRIM->TME) { p.sel.tfx = context->TEX0.TFX; p.sel.tcc = context->TEX0.TCC; p.sel.fst = PRIM->FST; p.sel.ltf = context->TEX1.IsLinear(); p.sel.tlu = GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0; p.sel.wms = context->CLAMP.WMS; p.sel.wmt = context->CLAMP.WMT; if(p.sel.iip == 0 && p.sel.tfx == TFX_MODULATE && p.sel.tcc) { if(m_vtrace.m_eq.rgba == 15 && (m_vtrace.m_min.c == GSVector4(128.0f * 128.0f)).alltrue()) { // modulate does not do anything when vertex color is 0x80 p.sel.tfx = TFX_DECAL; } } if(p.sel.tfx == TFX_DECAL) { p.sel.tcc = 1; } if(p.sel.fst == 0) { // skip per pixel division if q is constant GSVertexSW* v = m_vertices; if(m_vtrace.m_eq.q) { p.sel.fst = 1; if(v[0].t.z != 1.0f) { GSVector4 w = v[0].t.zzzz().rcpnr(); for(int i = 0, j = m_count; i < j; i++) { v[i].t *= w; } m_vtrace.m_min.t *= w; m_vtrace.m_max.t *= w; } } else if(primclass == GS_SPRITE_CLASS) { p.sel.fst = 1; GSVector4 tmin = GSVector4(FLT_MAX); GSVector4 tmax = GSVector4(-FLT_MAX); for(int i = 0, j = m_count; i < j; i += 2) { GSVector4 w = v[i + 1].t.zzzz().rcpnr(); GSVector4 v0 = v[i + 0].t * w; GSVector4 v1 = v[i + 1].t * w; v[i + 0].t = v0; v[i + 1].t = v1; tmin = tmin.minv(v0).minv(v1); tmax = tmax.maxv(v0).maxv(v1); } m_vtrace.m_max.t = tmax; m_vtrace.m_min.t = tmin; } } if(p.sel.fst) { // if q is constant we can do the half pel shift for bilinear sampling on the vertices if(p.sel.ltf) { GSVector4 half(0x8000, 0x8000); GSVertexSW* v = m_vertices; for(int i = 0, j = m_count; i < j; i++) { v[i].t -= half; } m_vtrace.m_min.t -= half; m_vtrace.m_max.t += half; } } /* else { GSVector4 tmin = GSVector4(FLT_MAX); GSVector4 tmax = GSVector4(-FLT_MAX); GSVertexSW* v = m_vertices; for(int i = 0, j = m_count; i < j; i++) { GSVector4 v0 = v[i].t * v[i].t.zzzz().rcpnr(); tmin = tmin.minv(v0); tmax = tmax.maxv(v0); } if(p.sel.ltf) { GSVector4 half(0x8000, 0x8000); tmin -= half; tmax += half; } m_vtrace.min.t = tmin; m_vtrace.max.t = tmax; } */ CRect r; int w = 1 << context->TEX0.TW; int h = 1 << context->TEX0.TH; MinMaxUV(w, h, r, p.sel.fst); const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, &r); if(!t) {ASSERT(0); return;} p.tex = t->m_buff; p.clut = m_mem.m_clut; p.tw = t->m_tw; } p.sel.fge = PRIM->FGE; if(context->FRAME.PSM != PSM_PSMCT24) { p.sel.date = context->TEST.DATE; p.sel.datm = context->TEST.DATM; } if(PRIM->ABE && !context->ALPHA.IsOpaque() || PRIM->AA1) { p.sel.abe = PRIM->ABE; p.sel.ababcd = context->ALPHA.ai32[0]; if(env.PABE.PABE) { p.sel.pabe = 1; } if(PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS)) { p.sel.aa1 = m_aa1 ? 1 : 0; } } if(p.sel.date || p.sel.aba == 1 || p.sel.abb == 1 || p.sel.abc == 1 || p.sel.abd == 1 || p.sel.atst != ATST_ALWAYS && p.sel.afail == AFAIL_RGB_ONLY || p.sel.fpsm == 0 && p.fm != 0 && p.fm != 0xffffffff || p.sel.fpsm == 1 && (p.fm & 0x00ffffff) != 0 && (p.fm & 0x00ffffff) != 0x00ffffff || p.sel.fpsm == 2 && (p.fm & 0x80f8f8f8) != 0 && (p.fm & 0x80f8f8f8) != 0x80f8f8f8) { p.sel.rfb = 1; } p.sel.colclamp = env.COLCLAMP.CLAMP; p.sel.fba = context->FBA.FBA; p.sel.dthe = env.DTHE.DTHE; } bool zwrite = p.zm != 0xffffffff; bool ztest = context->TEST.ZTE && context->TEST.ZTST > 1; p.sel.zwrite = zwrite; p.sel.ztest = ztest; if(zwrite || ztest) { p.sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM); p.sel.ztst = ztest ? context->TEST.ZTST : 1; p.sel.zoverflow = GSVector4i(m_vtrace.m_max.p).z == 0x80000000; } } void Draw() { GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM); m_vtrace.Update(m_vertices, m_count, primclass, PRIM->IIP, PRIM->TME, m_context->TEX0.TFX); GSScanlineParam p; GetScanlineParam(p, primclass); if((p.fm & p.zm) == 0xffffffff) { return; } if(s_dump) { CString str; str.Format(_T("c:\\temp1\\_%05d_f%I64d_tex_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); if(PRIM->TME) if(s_save) {m_mem.SaveBMP(str, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);} str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy); str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz0_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);} } GSRasterizerData data; data.scissor = GSVector4i(m_context->scissor.in); data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour data.primclass = primclass; data.vertices = m_vertices; data.count = m_count; data.param = &p; m_rl.Draw(&data); GSRasterizerStats stats; m_rl.GetStats(stats); m_perfmon.Put(GSPerfMon::Draw, 1); m_perfmon.Put(GSPerfMon::Prim, stats.prims); m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); GSVector4i pos(m_vtrace.m_min.p.xyxy(m_vtrace.m_max.p)); GSVector4i scissor = data.scissor; CRect r; r.left = max(scissor.x, min(scissor.z, pos.x)); r.top = max(scissor.y, min(scissor.w, pos.y)); r.right = max(scissor.x, min(scissor.z, pos.z)); r.bottom = max(scissor.y, min(scissor.w, pos.w)); GIFRegBITBLTBUF BITBLTBUF; BITBLTBUF.DBW = m_context->FRAME.FBW; if(p.fm != 0xffffffff) { BITBLTBUF.DBP = m_context->FRAME.Block(); BITBLTBUF.DPSM = m_context->FRAME.PSM; m_tc->InvalidateVideoMem(BITBLTBUF, r); } if(p.zm != 0xffffffff) { BITBLTBUF.DBP = m_context->ZBUF.Block(); BITBLTBUF.DPSM = m_context->ZBUF.PSM; m_tc->InvalidateVideoMem(BITBLTBUF, r); } if(s_dump) { CString str; str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy); str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);} } if(0)//stats.ticks > 1000000) { printf("* [%I64d | %012I64x] ticks %I64d prims %d (%d) pixels %d (%d)\n", m_perfmon.GetFrame(), p.sel.key, stats.ticks, stats.prims, stats.prims > 0 ? (int)(stats.ticks / stats.prims) : -1, stats.pixels, stats.pixels > 0 ? (int)(stats.ticks / stats.pixels) : -1); } } void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) { m_tc->InvalidateVideoMem(BITBLTBUF, r); } void MinMaxUV(int w, int h, CRect& r, DWORD fst) { const GSDrawingContext* context = m_context; int wms = context->CLAMP.WMS; int wmt = context->CLAMP.WMT; int minu = (int)context->CLAMP.MINU; int minv = (int)context->CLAMP.MINV; int maxu = (int)context->CLAMP.MAXU; int maxv = (int)context->CLAMP.MAXV; GSVector4i vr(0, 0, w, h); switch(wms) { case CLAMP_REPEAT: break; case CLAMP_CLAMP: break; case CLAMP_REGION_CLAMP: if(vr.x < minu) vr.x = minu; if(vr.z > maxu + 1) vr.z = maxu + 1; break; case CLAMP_REGION_REPEAT: vr.x = maxu; vr.z = vr.x + (minu + 1); break; default: __assume(0); } switch(wmt) { case CLAMP_REPEAT: break; case CLAMP_CLAMP: break; case CLAMP_REGION_CLAMP: if(vr.y < minv) vr.y = minv; if(vr.w > maxv + 1) vr.w = maxv + 1; break; case CLAMP_REGION_REPEAT: vr.y = maxv; vr.w = vr.y + (minv + 1); break; default: __assume(0); } if(fst) { GSVector4i uv = GSVector4i(m_vtrace.m_min.t.xyxy(m_vtrace.m_max.t)).sra32(16); /* int tw = context->TEX0.TW; int th = context->TEX0.TH; GSVector4i u = uv & GSVector4i::xffffffff().srl32(32 - tw); GSVector4i v = uv & GSVector4i::xffffffff().srl32(32 - th); GSVector4i uu = uv.sra32(tw); GSVector4i vv = uv.sra32(th); int mask = (uu.upl32(vv) == uu.uph32(vv)).mask(); */ switch(wms) { case CLAMP_REPEAT: /* if(mask & 0x000f) { if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1; } */ break; case CLAMP_CLAMP: case CLAMP_REGION_CLAMP: if(vr.x < uv.x) vr.x = uv.x; if(vr.z > uv.z + 1) vr.z = uv.z + 1; break; case CLAMP_REGION_REPEAT: // TODO break; default: __assume(0); } switch(wmt) { case CLAMP_REPEAT: /* if(mask & 0xf000) { if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1; } */ break; case CLAMP_CLAMP: case CLAMP_REGION_CLAMP: if(vr.y < uv.y) vr.y = uv.y; if(vr.w > uv.w + 1) vr.w = uv.w + 1; break; case CLAMP_REGION_REPEAT: // TODO break; default: __assume(0); } } r = vr; r &= CRect(0, 0, w, h); } public: GSRendererSW(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, int threads) : GSRendererT(base, mt, irq, nloophack, rs) { m_rl.Create(this, threads); m_tc = new GSTextureCacheSW(this); InitVertexKick >(); } virtual ~GSRendererSW() { delete m_tc; } template void VertexKick(bool skip) { const GSDrawingContext* context = m_context; GSVector4i xy = GSVector4i::load((int)m_v.XYZ.ai32[0]); xy = xy.insert16<3>(m_v.FOG.F); xy = xy.upl16(); xy -= context->XYOFFSET; GSVertexSW v; v.p = GSVector4(xy) * g_pos_scale; v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.ai32[0]).u8to32() << 7); if(tme) { float q; if(fst) { v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4)); q = 1.0f; } else { v.t = GSVector4(m_v.ST.S, m_v.ST.T); v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH); q = m_v.RGBAQ.Q; } v.t = v.t.xyxy(GSVector4::load(q)); } GSVertexSW& dst = m_vl.AddTail(); dst = v; dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the DWORD => float => DWORD conversion DWORD count = 0; if(GSVertexSW* v = DrawingKick(skip, count)) { GSVector4 pmin, pmax; switch(prim) { case GS_POINTLIST: pmin = v[0].p; pmax = v[0].p; break; case GS_LINELIST: case GS_LINESTRIP: case GS_SPRITE: pmin = v[0].p.minv(v[1].p); pmax = v[0].p.maxv(v[1].p); break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: pmin = v[0].p.minv(v[1].p).minv(v[2].p); pmax = v[0].p.maxv(v[1].p).maxv(v[2].p); break; } GSVector4 scissor = context->scissor.ex; GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); switch(prim) { case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: case GS_SPRITE: test |= pmin.ceil() == pmax.ceil(); break; } switch(prim) { case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: // are in line or just two of them are the same (cross product == 0) GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz(); test |= tmp == tmp.yxwz(); break; } if(test.mask() & 3) { return; } switch(prim) { case GS_POINTLIST: break; case GS_LINELIST: case GS_LINESTRIP: if(PRIM->IIP == 0) {v[0].c = v[1].c;} break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;} break; case GS_SPRITE: break; } if(m_count < 30 && m_count >= 3) { GSVertexSW* v = &m_vertices[m_count - 3]; int tl = 0; int br = 0; bool isquad = false; switch(prim) { case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: case GS_TRIANGLELIST: isquad = GSVertexSW::IsQuad(v, tl, br); break; } if(isquad) { m_count -= 3; if(m_count > 0) { tl += m_count; br += m_count; Flush(); } if(tl != 0) m_vertices[0] = m_vertices[tl]; if(br != 1) m_vertices[1] = m_vertices[br]; m_count = 2; UINT32 tmp = PRIM->PRIM; PRIM->PRIM = GS_SPRITE; Flush(); PRIM->PRIM = tmp; m_perfmon.Put(GSPerfMon::Quad, 1); return; } } m_count += count; } } };