mirror of https://github.com/PCSX2/pcsx2.git
GS: Mark SW renderer as multi-isa unshared
This commit is contained in:
parent
bc74c9064a
commit
b0da6361cb
|
@ -749,6 +749,7 @@ set(pcsx2GSHeaders
|
|||
GS/Renderers/Null/GSRendererNull.h
|
||||
GS/Renderers/Null/GSTextureNull.h
|
||||
GS/Renderers/HW/GSRendererHW.h
|
||||
GS/Renderers/HW/GSRendererHWMultiISA.cpp
|
||||
GS/Renderers/HW/GSTextureCache.h
|
||||
GS/Renderers/HW/GSTextureReplacements.h
|
||||
GS/Renderers/HW/GSVertexHW.h
|
||||
|
|
|
@ -22,12 +22,12 @@
|
|||
#include "GSGL.h"
|
||||
#include "GSUtil.h"
|
||||
#include "GSExtra.h"
|
||||
#include "Renderers/SW/GSRendererSW.h"
|
||||
#include "Renderers/Null/GSRendererNull.h"
|
||||
#include "Renderers/Null/GSDeviceNull.h"
|
||||
#include "Renderers/HW/GSRendererHW.h"
|
||||
#include "Renderers/HW/GSTextureReplacements.h"
|
||||
#include "GSLzma.h"
|
||||
#include "MultiISA.h"
|
||||
|
||||
#include "common/Console.h"
|
||||
#include "common/FileSystem.h"
|
||||
|
@ -91,7 +91,7 @@ int GSinit()
|
|||
// const type qualifier from all the affected variables.
|
||||
GSinitConfig();
|
||||
|
||||
|
||||
GSVertexSW::InitStatic();
|
||||
|
||||
GSUtil::Init();
|
||||
|
||||
|
@ -262,7 +262,7 @@ static bool DoGSOpen(GSRendererType renderer, u8* basemem)
|
|||
else
|
||||
{
|
||||
const int threads = theApp.GetConfigI("extrathreads");
|
||||
g_gs_renderer = std::make_unique<GSRendererSW>(threads);
|
||||
g_gs_renderer = std::unique_ptr<GSRenderer>(MULTI_ISA_SELECT(makeGSRendererSW)(threads));
|
||||
}
|
||||
}
|
||||
catch (std::exception& ex)
|
||||
|
|
|
@ -56,3 +56,11 @@ public:
|
|||
_aligned_free(p);
|
||||
}
|
||||
};
|
||||
|
||||
/// GSAlignedClass with a virtual destructor
|
||||
template <int i>
|
||||
class GSVirtualAlignedClass : public GSAlignedClass<i>
|
||||
{
|
||||
public:
|
||||
virtual ~GSVirtualAlignedClass() {}
|
||||
};
|
||||
|
|
|
@ -79,7 +79,7 @@ static int extend(int uv, int size)
|
|||
return size;
|
||||
}
|
||||
|
||||
GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap)
|
||||
GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap) const
|
||||
{
|
||||
if (mipmap)
|
||||
return TEX0; // no mipmaping allowed
|
||||
|
|
|
@ -140,7 +140,7 @@ public:
|
|||
return ZBUF.ZMSK == 0 && TEST.ZTE != 0; // ZTE == 0 is bug on the real hardware, write is blocked then
|
||||
}
|
||||
|
||||
GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false);
|
||||
GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false) const;
|
||||
void ComputeFixedTEX0(const GSVector4& st);
|
||||
bool HasFixedTEX0() const { return m_fixed_tex0; }
|
||||
|
||||
|
|
|
@ -83,3 +83,6 @@ extern const ProcessorFeatures g_cpu;
|
|||
#define MULTI_ISA_FRIEND(klass) friend class isa_native::klass;
|
||||
#define MULTI_ISA_SELECT(fn) (isa_native::fn)
|
||||
#endif
|
||||
|
||||
class GSRenderer;
|
||||
MULTI_ISA_DEF(GSRenderer* makeGSRendererSW(int threads);)
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
#include "GSRendererHW.h"
|
||||
#include "GSTextureReplacements.h"
|
||||
#include "GS/GSGL.h"
|
||||
#include "GS/Renderers/SW/GSTextureCacheSW.h"
|
||||
#include "GS/Renderers/SW/GSDrawScanline.h"
|
||||
#include "Host.h"
|
||||
#include "common/Align.h"
|
||||
#include "common/StringUtil.h"
|
||||
|
@ -35,6 +33,7 @@ GSRendererHW::GSRendererHW()
|
|||
, m_userhacks_tcoffset_y(0)
|
||||
, m_lod(GSVector2i(0, 0))
|
||||
{
|
||||
MULTI_ISA_SELECT(GSRendererHWPopulateFunctions)(*this);
|
||||
m_mipmap = (GSConfig.HWMipmap >= HWMipmapLevel::Basic);
|
||||
SetTCOffset();
|
||||
|
||||
|
@ -1403,7 +1402,7 @@ void GSRendererHW::Draw()
|
|||
const bool single_page = (delta_p.x <= 64.0f) && (delta_p.y <= 64.0f);
|
||||
|
||||
// We trigger the sw prim render here super early, to avoid creating superfluous render targets.
|
||||
if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender())
|
||||
if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this))
|
||||
{
|
||||
GL_CACHE("Possible texture decompression, drawn with SwPrimRender()");
|
||||
return;
|
||||
|
@ -1416,7 +1415,7 @@ void GSRendererHW::Draw()
|
|||
m_mem.m_clut.ClearDrawInvalidity();
|
||||
if (result)
|
||||
{
|
||||
if (SwPrimRender())
|
||||
if (SwPrimRender(*this))
|
||||
{
|
||||
GL_CACHE("Possible clut draw, drawn with SwPrimRender()");
|
||||
return;
|
||||
|
@ -4118,423 +4117,6 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t
|
|||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::SwPrimRender()
|
||||
{
|
||||
const GSDrawingContext* context = m_context;
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
|
||||
|
||||
GSDrawScanline::SharedData data;
|
||||
GSScanlineGlobalData& gd = data.global;
|
||||
|
||||
u32 clut_storage[256] = {0};
|
||||
GSVector4i dimx_storage[8];
|
||||
|
||||
m_sw_vertex_buffer.resize(((m_vertex.next + 1) & ~1));
|
||||
|
||||
data.primclass = m_vt.m_primclass;
|
||||
data.buff = nullptr;
|
||||
data.vertex = m_sw_vertex_buffer.data();
|
||||
data.vertex_count = m_vertex.next;
|
||||
data.index = m_index.buff;
|
||||
data.index_count = m_index.tail;
|
||||
data.scanmsk_value = m_env.SCANMSK.MSK;
|
||||
|
||||
// Skip per pixel division if q is constant.
|
||||
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !m_vt.m_eq.q.
|
||||
// If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR'.
|
||||
const u32 q_div = ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS));
|
||||
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, data.vertex, m_vertex.buff, m_vertex.next);
|
||||
|
||||
GSVector4i scissor = GSVector4i(m_context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||
|
||||
// Points and lines may have zero area bbox (single line: 0, 0 - 256, 0)
|
||||
|
||||
if (m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS)
|
||||
{
|
||||
if (bbox.x == bbox.z)
|
||||
bbox.z++;
|
||||
if (bbox.y == bbox.w)
|
||||
bbox.w++;
|
||||
}
|
||||
|
||||
data.scissor = scissor;
|
||||
data.bbox = bbox;
|
||||
data.frame = g_perfmon.GetFrame();
|
||||
|
||||
gd.vm = m_mem.m_vm8;
|
||||
|
||||
gd.fbo = context->offset.fb;
|
||||
gd.zbo = context->offset.zb;
|
||||
gd.fzbr = context->offset.fzb4->row;
|
||||
gd.fzbc = context->offset.fzb4->col;
|
||||
|
||||
gd.sel.key = 0;
|
||||
|
||||
gd.sel.fpsm = 3;
|
||||
gd.sel.zpsm = 3;
|
||||
gd.sel.atst = ATST_ALWAYS;
|
||||
gd.sel.tfx = TFX_NONE;
|
||||
gd.sel.ababcd = 0xff;
|
||||
gd.sel.prim = primclass;
|
||||
|
||||
u32 fm = context->FRAME.FBMSK;
|
||||
u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
const u32 fm_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk;
|
||||
|
||||
// When the format is 24bit (Z or C), DATE ceases to function.
|
||||
// It was believed that in 24bit mode all pixels pass because alpha doesn't exist
|
||||
// however after testing this on a PS2 it turns out nothing passes, it ignores the draw.
|
||||
if ((m_context->FRAME.PSM & 0xF) == PSM_PSMCT24 && m_context->TEST.DATE)
|
||||
{
|
||||
//DevCon.Warning("DATE on a 24bit format, Frame PSM %x", m_context->FRAME.PSM);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
|
||||
{
|
||||
fm = 0xffffffff;
|
||||
zm = 0xffffffff;
|
||||
}
|
||||
|
||||
if (PRIM->TME)
|
||||
{
|
||||
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||
{
|
||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
}
|
||||
}
|
||||
|
||||
if (context->TEST.ATE)
|
||||
{
|
||||
if (!TryAlphaTest(fm, fm_mask, zm))
|
||||
{
|
||||
gd.sel.atst = context->TEST.ATST;
|
||||
gd.sel.afail = context->TEST.AFAIL;
|
||||
|
||||
gd.aref = GSVector4i((int)context->TEST.AREF);
|
||||
|
||||
switch (gd.sel.atst)
|
||||
{
|
||||
case ATST_LESS:
|
||||
gd.sel.atst = ATST_LEQUAL;
|
||||
gd.aref -= GSVector4i::x00000001();
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
gd.sel.atst = ATST_GEQUAL;
|
||||
gd.aref += GSVector4i::x00000001();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bool fwrite = (fm & fm_mask) != fm_mask;
|
||||
const bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
|
||||
const bool zwrite = zm != 0xffffffff;
|
||||
const bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
|
||||
if (!fwrite && !zwrite)
|
||||
return false;
|
||||
|
||||
gd.sel.fwrite = fwrite;
|
||||
gd.sel.ftest = ftest;
|
||||
|
||||
if (fwrite || ftest)
|
||||
{
|
||||
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
|
||||
|
||||
if ((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
|
||||
{
|
||||
gd.sel.iip = PRIM->IIP;
|
||||
}
|
||||
|
||||
if (PRIM->TME)
|
||||
{
|
||||
gd.sel.tfx = context->TEX0.TFX;
|
||||
gd.sel.tcc = context->TEX0.TCC;
|
||||
gd.sel.fst = PRIM->FST;
|
||||
gd.sel.ltf = m_vt.IsLinear();
|
||||
|
||||
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||
{
|
||||
gd.sel.tlu = 1;
|
||||
|
||||
gd.clut = clut_storage; // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
|
||||
|
||||
memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
|
||||
}
|
||||
|
||||
gd.sel.wms = context->CLAMP.WMS;
|
||||
gd.sel.wmt = context->CLAMP.WMT;
|
||||
|
||||
if (gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
|
||||
{
|
||||
// modulate does not do anything when vertex color is 0x80
|
||||
|
||||
gd.sel.tfx = TFX_DECAL;
|
||||
}
|
||||
|
||||
GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), false);
|
||||
|
||||
const GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage;
|
||||
|
||||
if (!m_sw_texture)
|
||||
m_sw_texture = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA);
|
||||
else
|
||||
m_sw_texture->Reset(0, TEX0, env.TEXA);
|
||||
|
||||
m_sw_texture->Update(r);
|
||||
gd.tex[0] = m_sw_texture->m_buff;
|
||||
|
||||
gd.sel.tw = m_sw_texture->m_tw - 3;
|
||||
|
||||
{
|
||||
// skip per pixel division if q is constant. Sprite uses flat
|
||||
// q, so it's always constant by primitive.
|
||||
// Note: the 'q' division was done in GSRendererSW::ConvertVertexBuffer
|
||||
gd.sel.fst |= (m_vt.m_eq.q || primclass == GS_SPRITE_CLASS);
|
||||
|
||||
if (gd.sel.ltf && gd.sel.fst)
|
||||
{
|
||||
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
|
||||
// TODO: but not when mipmapping is used!!!
|
||||
|
||||
GSVertexSW* RESTRICT v = data.vertex;
|
||||
const GSVector4 half(0x8000, 0x8000);
|
||||
for (int i = 0, j = data.vertex_count; i < j; i++)
|
||||
{
|
||||
const GSVector4 t = v[i].t;
|
||||
v[i].t = (t - half).xyzw(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u16 tw = 1u << TEX0.TW;
|
||||
u16 th = 1u << TEX0.TH;
|
||||
|
||||
if (tw > 1024)
|
||||
tw = 1;
|
||||
|
||||
if (th > 1024)
|
||||
th = 1;
|
||||
|
||||
switch (context->CLAMP.WMS)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.U16[0] = gd.t.minmax.U16[0] = tw - 1;
|
||||
gd.t.max.U16[0] = gd.t.minmax.U16[2] = 0;
|
||||
gd.t.mask.U32[0] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
gd.t.min.U16[0] = gd.t.minmax.U16[0] = 0;
|
||||
gd.t.max.U16[0] = gd.t.minmax.U16[2] = tw - 1;
|
||||
gd.t.mask.U32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
// REGION_CLAMP ignores the actual texture size
|
||||
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU;
|
||||
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU;
|
||||
gd.t.mask.U32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
// MINU is restricted to MINU or texture size, whichever is smaller, MAXU is an offset in the texture.
|
||||
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU & (tw - 1);
|
||||
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU;
|
||||
gd.t.mask.U32[0] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch (context->CLAMP.WMT)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.U16[4] = gd.t.minmax.U16[1] = th - 1;
|
||||
gd.t.max.U16[4] = gd.t.minmax.U16[3] = 0;
|
||||
gd.t.mask.U32[2] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
gd.t.min.U16[4] = gd.t.minmax.U16[1] = 0;
|
||||
gd.t.max.U16[4] = gd.t.minmax.U16[3] = th - 1;
|
||||
gd.t.mask.U32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
// REGION_CLAMP ignores the actual texture size
|
||||
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV;
|
||||
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV; // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||
gd.t.mask.U32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
// MINV is restricted to MINV or texture size, whichever is smaller, MAXV is an offset in the texture.
|
||||
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127
|
||||
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV;
|
||||
gd.t.mask.U32[2] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
gd.t.min = gd.t.min.xxxxlh();
|
||||
gd.t.max = gd.t.max.xxxxlh();
|
||||
gd.t.mask = gd.t.mask.xxzz();
|
||||
gd.t.invmask = ~gd.t.mask;
|
||||
}
|
||||
|
||||
if (PRIM->FGE)
|
||||
{
|
||||
gd.sel.fge = 1;
|
||||
|
||||
gd.frb = env.FOGCOL.U32[0] & 0x00ff00ff;
|
||||
gd.fga = (env.FOGCOL.U32[0] >> 8) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
if (context->FRAME.PSM != PSM_PSMCT24)
|
||||
{
|
||||
gd.sel.date = context->TEST.DATE;
|
||||
gd.sel.datm = context->TEST.DATM;
|
||||
}
|
||||
|
||||
if (!IsOpaque())
|
||||
{
|
||||
gd.sel.abe = PRIM->ABE;
|
||||
gd.sel.ababcd = context->ALPHA.U32[0];
|
||||
|
||||
if (env.PABE.PABE)
|
||||
{
|
||||
gd.sel.pabe = 1;
|
||||
}
|
||||
|
||||
if (PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS))
|
||||
{
|
||||
gd.sel.aa1 = 1;
|
||||
}
|
||||
|
||||
gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh();
|
||||
}
|
||||
|
||||
const u32 masked_fm = fm & fm_mask;
|
||||
if (gd.sel.date
|
||||
|| gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1
|
||||
|| gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY
|
||||
|| gd.sel.fpsm == 0 && masked_fm != 0 && masked_fm != fm_mask
|
||||
|| gd.sel.fpsm == 1 && masked_fm != 0 && masked_fm != fm_mask
|
||||
|| gd.sel.fpsm == 2 && masked_fm != 0 && masked_fm != fm_mask)
|
||||
{
|
||||
gd.sel.rfb = 1;
|
||||
}
|
||||
|
||||
gd.sel.colclamp = env.COLCLAMP.CLAMP;
|
||||
gd.sel.fba = context->FBA.FBA;
|
||||
|
||||
if (env.DTHE.DTHE)
|
||||
{
|
||||
gd.sel.dthe = 1;
|
||||
|
||||
gd.dimx = dimx_storage;
|
||||
|
||||
memcpy(gd.dimx, env.dimx, sizeof(env.dimx));
|
||||
}
|
||||
}
|
||||
|
||||
gd.sel.zwrite = zwrite;
|
||||
gd.sel.ztest = ztest;
|
||||
|
||||
if (zwrite || ztest)
|
||||
{
|
||||
const u32 z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8);
|
||||
|
||||
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
||||
gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS;
|
||||
gd.sel.zequal = !!m_vt.m_eq.z;
|
||||
gd.sel.zoverflow = (u32)GSVector4i(m_vt.m_max.p).z == 0x80000000U;
|
||||
gd.sel.zclamp = (u32)GSVector4i(m_vt.m_max.p).z > z_max;
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
gd.fm = fm;
|
||||
gd.zm = zm;
|
||||
|
||||
if (gd.sel.fpsm == 1)
|
||||
{
|
||||
gd.fm |= 0xff000000;
|
||||
}
|
||||
else if (gd.sel.fpsm == 2)
|
||||
{
|
||||
u32 rb = gd.fm & 0x00f800f8;
|
||||
u32 ga = gd.fm & 0x8000f800;
|
||||
|
||||
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000;
|
||||
}
|
||||
|
||||
if (gd.sel.zpsm == 1)
|
||||
{
|
||||
gd.zm |= 0xff000000;
|
||||
}
|
||||
else if (gd.sel.zpsm == 2)
|
||||
{
|
||||
gd.zm |= 0xffff0000;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
gd.fm = GSVector4i(fm);
|
||||
gd.zm = GSVector4i(zm);
|
||||
|
||||
if (gd.sel.fpsm == 1)
|
||||
{
|
||||
gd.fm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if (gd.sel.fpsm == 2)
|
||||
{
|
||||
GSVector4i rb = gd.fm & 0x00f800f8;
|
||||
GSVector4i ga = gd.fm & 0x8000f800;
|
||||
|
||||
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if (gd.sel.zpsm == 1)
|
||||
{
|
||||
gd.zm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if (gd.sel.zpsm == 2)
|
||||
{
|
||||
gd.zm |= GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if (gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data.bbox.eq(data.bbox.rintersect(data.scissor))) // TODO: check scissor horizontally only
|
||||
{
|
||||
gd.sel.notest = 1;
|
||||
|
||||
const u32 ofx = context->XYOFFSET.OFX;
|
||||
|
||||
for (int i = 0, j = m_vertex.tail; i < j; i++)
|
||||
{
|
||||
#if _M_SSE >= 0x501
|
||||
if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
|
||||
#else
|
||||
if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
|
||||
#endif
|
||||
{
|
||||
gd.sel.notest = 0;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_sw_rasterizer)
|
||||
m_sw_rasterizer = std::make_unique<GSRasterizer>(new GSDrawScanline(), 0, 1);
|
||||
|
||||
m_sw_rasterizer->Draw(&data);
|
||||
|
||||
m_tc->InvalidateVideoMem(context->offset.fb, bbox);
|
||||
return true;
|
||||
}
|
||||
|
||||
// hacks
|
||||
|
||||
GSRendererHW::Hacks::Hacks()
|
||||
|
|
|
@ -20,11 +20,15 @@
|
|||
#include "GS/Renderers/Common/GSRenderer.h"
|
||||
#include "GS/Renderers/SW/GSTextureCacheSW.h"
|
||||
#include "GS/GSState.h"
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
class GSRasterizer;
|
||||
class GSRendererHW;
|
||||
MULTI_ISA_DEF(class GSRendererHWFunctions;)
|
||||
MULTI_ISA_DEF(void GSRendererHWPopulateFunctions(GSRendererHW& renderer);)
|
||||
|
||||
class GSRendererHW : public GSRenderer
|
||||
{
|
||||
MULTI_ISA_FRIEND(GSRendererHWFunctions);
|
||||
public:
|
||||
static constexpr int MAX_FRAMEBUFFER_HEIGHT = 1280;
|
||||
|
||||
|
@ -130,7 +134,7 @@ private:
|
|||
bool PossibleCLUTDraw();
|
||||
bool PossibleCLUTDrawAggressive();
|
||||
bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex);
|
||||
bool SwPrimRender();
|
||||
bool (*SwPrimRender)(GSRendererHW&);
|
||||
|
||||
template <bool linear>
|
||||
void RoundSpriteOffset();
|
||||
|
@ -166,7 +170,7 @@ private:
|
|||
// software sprite renderer state
|
||||
std::vector<GSVertexSW> m_sw_vertex_buffer;
|
||||
std::unique_ptr<GSTextureCacheSW::Texture> m_sw_texture;
|
||||
std::unique_ptr<GSRasterizer> m_sw_rasterizer;
|
||||
std::unique_ptr<GSVirtualAlignedClass<32>> m_sw_rasterizer;
|
||||
|
||||
public:
|
||||
GSRendererHW();
|
||||
|
|
|
@ -0,0 +1,461 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2022 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "GSRendererHW.h"
|
||||
|
||||
#include "GS/Renderers/SW/GSTextureCacheSW.h"
|
||||
#include "GS/Renderers/SW/GSDrawScanline.h"
|
||||
|
||||
class CURRENT_ISA::GSRendererHWFunctions
|
||||
{
|
||||
public:
|
||||
static bool SwPrimRender(GSRendererHW& hw);
|
||||
|
||||
static void Populate(GSRendererHW& renderer)
|
||||
{
|
||||
renderer.SwPrimRender = SwPrimRender;
|
||||
}
|
||||
};
|
||||
|
||||
MULTI_ISA_UNSHARED_IMPL;
|
||||
|
||||
void CURRENT_ISA::GSRendererHWPopulateFunctions(GSRendererHW& renderer)
|
||||
{
|
||||
GSRendererHWFunctions::Populate(renderer);
|
||||
}
|
||||
|
||||
bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw)
|
||||
{
|
||||
GSVertexTrace& vt = hw.m_vt;
|
||||
const GIFRegPRIM* PRIM = hw.PRIM;
|
||||
const GSDrawingContext* context = hw.m_context;
|
||||
const GSDrawingEnvironment& env = hw.m_env;
|
||||
const GS_PRIM_CLASS primclass = vt.m_primclass;
|
||||
|
||||
GSDrawScanline::SharedData data;
|
||||
GSScanlineGlobalData& gd = data.global;
|
||||
|
||||
u32 clut_storage[256];
|
||||
GSVector4i dimx_storage[8];
|
||||
|
||||
hw.m_sw_vertex_buffer.resize(((hw.m_vertex.next + 1) & ~1));
|
||||
|
||||
data.primclass = vt.m_primclass;
|
||||
data.buff = nullptr;
|
||||
data.vertex = hw.m_sw_vertex_buffer.data();
|
||||
data.vertex_count = hw.m_vertex.next;
|
||||
data.index = hw.m_index.buff;
|
||||
data.index_count = hw.m_index.tail;
|
||||
data.scanmsk_value = hw.m_env.SCANMSK.MSK;
|
||||
|
||||
// Skip per pixel division if q is constant.
|
||||
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !vt.m_eq.q.
|
||||
// If you have both GS_SPRITE_CLASS && vt.m_eq.q, it will depends on the first part of the 'OR'.
|
||||
const u32 q_div = ((vt.m_eq.q && vt.m_min.t.z != 1.0f) || (!vt.m_eq.q && vt.m_primclass == GS_SPRITE_CLASS));
|
||||
GSVertexSW::s_cvb[vt.m_primclass][PRIM->TME][PRIM->FST][q_div](context, data.vertex, hw.m_vertex.buff, hw.m_vertex.next);
|
||||
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(vt.m_min.p.floor().xyxy(vt.m_max.p.ceil()));
|
||||
|
||||
// Points and lines may have zero area bbox (single line: 0, 0 - 256, 0)
|
||||
|
||||
if (vt.m_primclass == GS_POINT_CLASS || vt.m_primclass == GS_LINE_CLASS)
|
||||
{
|
||||
if (bbox.x == bbox.z)
|
||||
bbox.z++;
|
||||
if (bbox.y == bbox.w)
|
||||
bbox.w++;
|
||||
}
|
||||
|
||||
data.scissor = scissor;
|
||||
data.bbox = bbox;
|
||||
data.frame = g_perfmon.GetFrame();
|
||||
|
||||
gd.vm = hw.m_mem.m_vm8;
|
||||
|
||||
gd.fbo = context->offset.fb;
|
||||
gd.zbo = context->offset.zb;
|
||||
gd.fzbr = context->offset.fzb4->row;
|
||||
gd.fzbc = context->offset.fzb4->col;
|
||||
|
||||
gd.sel.key = 0;
|
||||
|
||||
gd.sel.fpsm = 3;
|
||||
gd.sel.zpsm = 3;
|
||||
gd.sel.atst = ATST_ALWAYS;
|
||||
gd.sel.tfx = TFX_NONE;
|
||||
gd.sel.ababcd = 0xff;
|
||||
gd.sel.prim = primclass;
|
||||
|
||||
u32 fm = context->FRAME.FBMSK;
|
||||
u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
const u32 fm_mask = GSLocalMemory::m_psm[context->FRAME.PSM].fmsk;
|
||||
|
||||
// When the format is 24bit (Z or C), DATE ceases to function.
|
||||
// It was believed that in 24bit mode all pixels pass because alpha doesn't exist
|
||||
// however after testing this on a PS2 it turns out nothing passes, it ignores the draw.
|
||||
if ((context->FRAME.PSM & 0xF) == PSM_PSMCT24 && context->TEST.DATE)
|
||||
{
|
||||
//DevCon.Warning("DATE on a 24bit format, Frame PSM %x", context->FRAME.PSM);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
|
||||
{
|
||||
fm = 0xffffffff;
|
||||
zm = 0xffffffff;
|
||||
}
|
||||
|
||||
if (PRIM->TME)
|
||||
{
|
||||
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||
{
|
||||
hw.m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
}
|
||||
}
|
||||
|
||||
if (context->TEST.ATE)
|
||||
{
|
||||
if (!hw.TryAlphaTest(fm, fm_mask, zm))
|
||||
{
|
||||
gd.sel.atst = context->TEST.ATST;
|
||||
gd.sel.afail = context->TEST.AFAIL;
|
||||
|
||||
gd.aref = GSVector4i((int)context->TEST.AREF);
|
||||
|
||||
switch (gd.sel.atst)
|
||||
{
|
||||
case ATST_LESS:
|
||||
gd.sel.atst = ATST_LEQUAL;
|
||||
gd.aref -= GSVector4i::x00000001();
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
gd.sel.atst = ATST_GEQUAL;
|
||||
gd.aref += GSVector4i::x00000001();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bool fwrite = (fm & fm_mask) != fm_mask;
|
||||
const bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
|
||||
const bool zwrite = zm != 0xffffffff;
|
||||
const bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
|
||||
if (!fwrite && !zwrite)
|
||||
return false;
|
||||
|
||||
gd.sel.fwrite = fwrite;
|
||||
gd.sel.ftest = ftest;
|
||||
|
||||
if (fwrite || ftest)
|
||||
{
|
||||
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
|
||||
|
||||
if ((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && vt.m_eq.rgba != 0xffff)
|
||||
{
|
||||
gd.sel.iip = PRIM->IIP;
|
||||
}
|
||||
|
||||
if (PRIM->TME)
|
||||
{
|
||||
gd.sel.tfx = context->TEX0.TFX;
|
||||
gd.sel.tcc = context->TEX0.TCC;
|
||||
gd.sel.fst = PRIM->FST;
|
||||
gd.sel.ltf = vt.IsLinear();
|
||||
|
||||
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||
{
|
||||
gd.sel.tlu = 1;
|
||||
|
||||
gd.clut = clut_storage; // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
|
||||
|
||||
memcpy(gd.clut, (const u32*)hw.m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
|
||||
}
|
||||
|
||||
gd.sel.wms = context->CLAMP.WMS;
|
||||
gd.sel.wmt = context->CLAMP.WMT;
|
||||
|
||||
if (gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && vt.m_eq.rgba == 0xffff && vt.m_min.c.eq(GSVector4i(128)))
|
||||
{
|
||||
// modulate does not do anything when vertex color is 0x80
|
||||
|
||||
gd.sel.tfx = TFX_DECAL;
|
||||
}
|
||||
|
||||
GIFRegTEX0 TEX0 = context->GetSizeFixedTEX0(vt.m_min.t.xyxy(vt.m_max.t), vt.IsLinear(), false);
|
||||
|
||||
const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage;
|
||||
|
||||
if (!hw.m_sw_texture)
|
||||
hw.m_sw_texture = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA);
|
||||
else
|
||||
hw.m_sw_texture->Reset(0, TEX0, env.TEXA);
|
||||
|
||||
hw.m_sw_texture->Update(r);
|
||||
gd.tex[0] = hw.m_sw_texture->m_buff;
|
||||
|
||||
gd.sel.tw = hw.m_sw_texture->m_tw - 3;
|
||||
|
||||
{
|
||||
// skip per pixel division if q is constant. Sprite uses flat
|
||||
// q, so it's always constant by primitive.
|
||||
// Note: the 'q' division was done in GSRendererSW::ConvertVertexBuffer
|
||||
gd.sel.fst |= (vt.m_eq.q || primclass == GS_SPRITE_CLASS);
|
||||
|
||||
if (gd.sel.ltf && gd.sel.fst)
|
||||
{
|
||||
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
|
||||
|
||||
// TODO: but not when mipmapping is used!!!
|
||||
|
||||
const GSVector4 half(0x8000, 0x8000);
|
||||
|
||||
GSVertexSW* RESTRICT v = data.vertex;
|
||||
|
||||
for (int i = 0, j = data.vertex_count; i < j; i++)
|
||||
{
|
||||
const GSVector4 t = v[i].t;
|
||||
|
||||
v[i].t = (t - half).xyzw(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u16 tw = 1u << TEX0.TW;
|
||||
u16 th = 1u << TEX0.TH;
|
||||
|
||||
if (tw > 1024)
|
||||
tw = 1;
|
||||
|
||||
if (th > 1024)
|
||||
th = 1;
|
||||
|
||||
switch (context->CLAMP.WMS)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.U16[0] = gd.t.minmax.U16[0] = tw - 1;
|
||||
gd.t.max.U16[0] = gd.t.minmax.U16[2] = 0;
|
||||
gd.t.mask.U32[0] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
gd.t.min.U16[0] = gd.t.minmax.U16[0] = 0;
|
||||
gd.t.max.U16[0] = gd.t.minmax.U16[2] = tw - 1;
|
||||
gd.t.mask.U32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
// REGION_CLAMP ignores the actual texture size
|
||||
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU;
|
||||
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU;
|
||||
gd.t.mask.U32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
// MINU is restricted to MINU or texture size, whichever is smaller, MAXU is an offset in the texture.
|
||||
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU & (tw - 1);
|
||||
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU;
|
||||
gd.t.mask.U32[0] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch (context->CLAMP.WMT)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.U16[4] = gd.t.minmax.U16[1] = th - 1;
|
||||
gd.t.max.U16[4] = gd.t.minmax.U16[3] = 0;
|
||||
gd.t.mask.U32[2] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
gd.t.min.U16[4] = gd.t.minmax.U16[1] = 0;
|
||||
gd.t.max.U16[4] = gd.t.minmax.U16[3] = th - 1;
|
||||
gd.t.mask.U32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
// REGION_CLAMP ignores the actual texture size
|
||||
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV;
|
||||
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV; // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||
gd.t.mask.U32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
// MINV is restricted to MINV or texture size, whichever is smaller, MAXV is an offset in the texture.
|
||||
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127
|
||||
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV;
|
||||
gd.t.mask.U32[2] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
gd.t.min = gd.t.min.xxxxlh();
|
||||
gd.t.max = gd.t.max.xxxxlh();
|
||||
gd.t.mask = gd.t.mask.xxzz();
|
||||
gd.t.invmask = ~gd.t.mask;
|
||||
}
|
||||
|
||||
if (PRIM->FGE)
|
||||
{
|
||||
gd.sel.fge = 1;
|
||||
|
||||
gd.frb = env.FOGCOL.U32[0] & 0x00ff00ff;
|
||||
gd.fga = (env.FOGCOL.U32[0] >> 8) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
if (context->FRAME.PSM != PSM_PSMCT24)
|
||||
{
|
||||
gd.sel.date = context->TEST.DATE;
|
||||
gd.sel.datm = context->TEST.DATM;
|
||||
}
|
||||
|
||||
if (!hw.IsOpaque())
|
||||
{
|
||||
gd.sel.abe = PRIM->ABE;
|
||||
gd.sel.ababcd = context->ALPHA.U32[0];
|
||||
|
||||
if (env.PABE.PABE)
|
||||
{
|
||||
gd.sel.pabe = 1;
|
||||
}
|
||||
|
||||
if (PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS))
|
||||
{
|
||||
gd.sel.aa1 = 1;
|
||||
}
|
||||
|
||||
gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh();
|
||||
}
|
||||
|
||||
const u32 masked_fm = fm & fm_mask;
|
||||
if (gd.sel.date
|
||||
|| gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1
|
||||
|| gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY
|
||||
|| gd.sel.fpsm == 0 && masked_fm != 0 && masked_fm != fm_mask
|
||||
|| gd.sel.fpsm == 1 && masked_fm != 0 && masked_fm != fm_mask
|
||||
|| gd.sel.fpsm == 2 && masked_fm != 0 && masked_fm != fm_mask)
|
||||
{
|
||||
gd.sel.rfb = 1;
|
||||
}
|
||||
|
||||
gd.sel.colclamp = env.COLCLAMP.CLAMP;
|
||||
gd.sel.fba = context->FBA.FBA;
|
||||
|
||||
if (env.DTHE.DTHE)
|
||||
{
|
||||
gd.sel.dthe = 1;
|
||||
|
||||
gd.dimx = dimx_storage;
|
||||
|
||||
memcpy(gd.dimx, env.dimx, sizeof(env.dimx));
|
||||
}
|
||||
}
|
||||
|
||||
gd.sel.zwrite = zwrite;
|
||||
gd.sel.ztest = ztest;
|
||||
|
||||
if (zwrite || ztest)
|
||||
{
|
||||
const u32 z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8);
|
||||
|
||||
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
||||
gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS;
|
||||
gd.sel.zequal = !!vt.m_eq.z;
|
||||
gd.sel.zoverflow = (u32)GSVector4i(vt.m_max.p).z == 0x80000000U;
|
||||
gd.sel.zclamp = (u32)GSVector4i(vt.m_max.p).z > z_max;
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
gd.fm = fm;
|
||||
gd.zm = zm;
|
||||
|
||||
if (gd.sel.fpsm == 1)
|
||||
{
|
||||
gd.fm |= 0xff000000;
|
||||
}
|
||||
else if (gd.sel.fpsm == 2)
|
||||
{
|
||||
u32 rb = gd.fm & 0x00f800f8;
|
||||
u32 ga = gd.fm & 0x8000f800;
|
||||
|
||||
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000;
|
||||
}
|
||||
|
||||
if (gd.sel.zpsm == 1)
|
||||
{
|
||||
gd.zm |= 0xff000000;
|
||||
}
|
||||
else if (gd.sel.zpsm == 2)
|
||||
{
|
||||
gd.zm |= 0xffff0000;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
gd.fm = GSVector4i(fm);
|
||||
gd.zm = GSVector4i(zm);
|
||||
|
||||
if (gd.sel.fpsm == 1)
|
||||
{
|
||||
gd.fm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if (gd.sel.fpsm == 2)
|
||||
{
|
||||
GSVector4i rb = gd.fm & 0x00f800f8;
|
||||
GSVector4i ga = gd.fm & 0x8000f800;
|
||||
|
||||
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if (gd.sel.zpsm == 1)
|
||||
{
|
||||
gd.zm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if (gd.sel.zpsm == 2)
|
||||
{
|
||||
gd.zm |= GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if (gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data.bbox.eq(data.bbox.rintersect(data.scissor))) // TODO: check scissor horizontally only
|
||||
{
|
||||
gd.sel.notest = 1;
|
||||
|
||||
const u32 ofx = context->XYOFFSET.OFX;
|
||||
|
||||
for (int i = 0, j = hw.m_vertex.tail; i < j; i++)
|
||||
{
|
||||
#if _M_SSE >= 0x501
|
||||
if ((((hw.m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
|
||||
#else
|
||||
if ((((hw.m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
|
||||
#endif
|
||||
{
|
||||
gd.sel.notest = 0;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!hw.m_sw_rasterizer)
|
||||
hw.m_sw_rasterizer = std::make_unique<GSRasterizer>(new GSDrawScanline(), 0, 1);
|
||||
|
||||
static_cast<GSRasterizer*>(hw.m_sw_rasterizer.get())->Draw(&data);
|
||||
|
||||
hw.m_tc->InvalidateVideoMem(context->offset.fb, bbox);
|
||||
return true;
|
||||
}
|
|
@ -17,8 +17,12 @@
|
|||
#include "GSDrawScanline.h"
|
||||
#include "GSTextureCacheSW.h"
|
||||
|
||||
#if MULTI_ISA_COMPILE_ONCE
|
||||
// Lack of a better home
|
||||
constexpr GSScanlineConstantData g_const;
|
||||
#endif
|
||||
|
||||
MULTI_ISA_UNSHARED_IMPL;
|
||||
|
||||
GSDrawScanline::GSDrawScanline()
|
||||
: m_sp_map("GSSetupPrim", &m_local)
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include "GSSetupPrimCodeGenerator.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
class GSDrawScanline : public IDrawScanline
|
||||
{
|
||||
public:
|
||||
|
@ -85,3 +87,5 @@ public:
|
|||
m_ds_map.PrintStats();
|
||||
}
|
||||
};
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "GS/Renderers/Common/GSFunctionMap.h"
|
||||
#include "GSVertexSW.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_IMPL;
|
||||
using namespace Xbyak;
|
||||
|
||||
// Ease the reading of the code
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "GSNewCodeGenerator.h"
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
#undef _t // Conflict with wx, hopefully no one needs this
|
||||
|
||||
|
@ -30,6 +31,8 @@
|
|||
#define DRAW_SCANLINE_USING_YMM 0
|
||||
#endif
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
|
||||
{
|
||||
using _parent = GSNewCodeGenerator;
|
||||
|
@ -187,3 +190,5 @@ private:
|
|||
int pixels, int mip_offset);
|
||||
void ReadTexelImpl(const Xmm& dst, const Xmm& addr, u8 i, bool texInA3, bool preserveDst);
|
||||
};
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -21,11 +21,13 @@
|
|||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
static std::map<u64, bool> s_use_c_draw_scanline;
|
||||
static std::mutex s_use_c_draw_scanline_mutex;
|
||||
MULTI_ISA_UNSHARED_IMPL;
|
||||
|
||||
static bool shouldUseCDrawScanline(u64 key)
|
||||
{
|
||||
static std::map<u64, bool> s_use_c_draw_scanline;
|
||||
static std::mutex s_use_c_draw_scanline_mutex;
|
||||
|
||||
static const char* const fname = getenv("USE_C_DRAW_SCANLINE");
|
||||
if (!fname)
|
||||
return false;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "GSScanlineEnvironment.h"
|
||||
#include "GS/Renderers/Common/GSFunctionMap.h"
|
||||
#include "GS/GSUtil.h"
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
#if defined(_M_AMD64) || defined(_WIN64)
|
||||
#define RegLong Xbyak::Reg64
|
||||
|
@ -25,6 +26,8 @@
|
|||
#define RegLong Xbyak::Reg32
|
||||
#endif
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
||||
{
|
||||
void operator=(const GSDrawScanlineCodeGenerator&);
|
||||
|
@ -36,3 +39,5 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
public:
|
||||
GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -27,6 +27,8 @@
|
|||
|
||||
#define ENABLE_DRAW_STATS 0
|
||||
|
||||
MULTI_ISA_UNSHARED_IMPL;
|
||||
|
||||
int GSRasterizerData::s_counter = 0;
|
||||
|
||||
static int compute_best_thread_height(int threads)
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
#include "GS/GSPerfMon.h"
|
||||
#include "GS/GSThread_CXX11.h"
|
||||
#include "GS/GSRingHeap.h"
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
class alignas(32) GSRasterizerData : public GSAlignedClass<32>
|
||||
{
|
||||
|
@ -113,7 +116,7 @@ public:
|
|||
__forceinline bool IsSolidRect() const { return m_dr != NULL; }
|
||||
};
|
||||
|
||||
class IRasterizer : public GSAlignedClass<32>
|
||||
class IRasterizer : public GSVirtualAlignedClass<32>
|
||||
{
|
||||
public:
|
||||
virtual ~IRasterizer() {}
|
||||
|
@ -234,3 +237,5 @@ public:
|
|||
int GetPixels(bool reset);
|
||||
void PrintStats() {}
|
||||
};
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -18,14 +18,18 @@
|
|||
#include "GS/GSGL.h"
|
||||
#include "common/StringUtil.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_IMPL;
|
||||
|
||||
GSRenderer* CURRENT_ISA::makeGSRendererSW(int threads)
|
||||
{
|
||||
return new GSRendererSW(threads);
|
||||
}
|
||||
|
||||
#define LOG 0
|
||||
|
||||
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
|
||||
|
||||
CONSTINIT const GSVector4 GSVertexSW::m_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
#if _M_SSE >= 0x501
|
||||
CONSTINIT const GSVector8 GSVertexSW::m_pos_scale2 = GSVector8::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
#endif
|
||||
static constexpr GSVector4 s_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
|
||||
GSRendererSW::GSRendererSW(int threads)
|
||||
: GSRenderer(), m_fzb(NULL)
|
||||
|
@ -223,9 +227,20 @@ GSTexture* GSRendererSW::GetFeedbackOutput()
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
MULTI_ISA_DEF(void GSVertexSWInitStatic();)
|
||||
|
||||
#if MULTI_ISA_COMPILE_ONCE
|
||||
GSVertexSW::ConvertVertexBufferPtr GSVertexSW::s_cvb[4][2][2][2];
|
||||
void GSVertexSW::InitStatic()
|
||||
{
|
||||
MULTI_ISA_SELECT(GSVertexSWInitStatic)();
|
||||
}
|
||||
#endif
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
|
||||
void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
|
||||
void ConvertVertexBuffer(const GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
|
||||
{
|
||||
// FIXME q_div wasn't added to AVX2 code path.
|
||||
|
||||
|
@ -274,7 +289,7 @@ void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW*
|
|||
|
||||
if (primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
dst->p = GSVector4(xy).xyyw(GSVector4(xyzuvf)) * m_pos_scale;
|
||||
dst->p = GSVector4(xy).xyyw(GSVector4(xyzuvf)) * s_pos_scale;
|
||||
|
||||
xyzuvf = xyzuvf.min_u32(z_max);
|
||||
t = t.insert32<1, 3>(GSVector4::cast(xyzuvf));
|
||||
|
@ -282,7 +297,7 @@ void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW*
|
|||
else
|
||||
{
|
||||
double z = static_cast<double>(static_cast<u32>(xyzuvf.extract32<1>()));
|
||||
dst->p = (GSVector4(xy) * m_pos_scale).upld(GSVector4::f64(z, 0.0));
|
||||
dst->p = (GSVector4(xy) * s_pos_scale).upld(GSVector4::f64(z, 0.0));
|
||||
t = t.blend32<8>(GSVector4(xyzuvf << 7));
|
||||
}
|
||||
|
||||
|
@ -296,22 +311,23 @@ void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW*
|
|||
}
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
GSVertexSW::ConvertVertexBufferPtr GSVertexSW::s_cvb[4][2][2][2] = {
|
||||
#define InitCVB3(P, T, F) { &GSVertexSW::ConvertVertexBuffer<P, T, F, 0>, &GSVertexSW::ConvertVertexBuffer<P, T, F, 1> }
|
||||
#define InitCVB2(P, T) { InitCVB3(P, T, 0), InitCVB3(P, T, 1) }
|
||||
#define InitCVB(P) { InitCVB2(static_cast<u32>(P), 0), InitCVB2(static_cast<u32>(P), 1) }
|
||||
|
||||
InitCVB(GS_POINT_CLASS),
|
||||
InitCVB(GS_LINE_CLASS),
|
||||
InitCVB(GS_TRIANGLE_CLASS),
|
||||
InitCVB(GS_SPRITE_CLASS)
|
||||
|
||||
#undef InitCVB
|
||||
void GSVertexSWInitStatic()
|
||||
{
|
||||
#define InitCVB4(P, T, F, Q) GSVertexSW::s_cvb[P][T][F][Q] = ConvertVertexBuffer<P, T, F, Q>;
|
||||
#define InitCVB3(P, T, F) InitCVB4(P, T, F, 0) InitCVB4(P, T, F, 1)
|
||||
#define InitCVB2(P, T) InitCVB3(P, T, 0) InitCVB3(P, T, 1)
|
||||
#define InitCVB1(P) InitCVB2(P, 0) InitCVB2(P, 1)
|
||||
InitCVB1(GS_POINT_CLASS)
|
||||
InitCVB1(GS_LINE_CLASS)
|
||||
InitCVB1(GS_TRIANGLE_CLASS)
|
||||
InitCVB1(GS_SPRITE_CLASS)
|
||||
#undef InitCVB1
|
||||
#undef InitCVB2
|
||||
#undef InitCVB3
|
||||
};
|
||||
// clang-format on
|
||||
#undef InitCVB4
|
||||
}
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
||||
void GSRendererSW::Draw()
|
||||
{
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
#include "GSTextureCacheSW.h"
|
||||
#include "GSDrawScanline.h"
|
||||
#include "GS/GSRingHeap.h"
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
class GSRendererSW final : public GSRenderer
|
||||
{
|
||||
|
@ -95,3 +98,5 @@ public:
|
|||
|
||||
void Destroy() override;
|
||||
};
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "GSSetupPrimCodeGenerator.all.h"
|
||||
#include "GSVertexSW.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_IMPL;
|
||||
using namespace Xbyak;
|
||||
|
||||
#define _rip_local(field) ((m_rip) ? ptr[rip + (char*)&m_local.field] : ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)])
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "GSNewCodeGenerator.h"
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
#define SETUP_PRIM_VECTOR_REGISTER Xbyak::Ymm
|
||||
|
@ -28,6 +29,8 @@
|
|||
#define SETUP_PRIM_USING_YMM 0
|
||||
#endif
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
|
||||
{
|
||||
using _parent = GSNewCodeGenerator;
|
||||
|
@ -77,3 +80,5 @@ private:
|
|||
void Texture();
|
||||
void Color();
|
||||
};
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include "GSSetupPrimCodeGenerator.h"
|
||||
#include "GSSetupPrimCodeGenerator.all.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
MULTI_ISA_UNSHARED_IMPL;
|
||||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
#include "GSScanlineEnvironment.h"
|
||||
#include "GS/Renderers/Common/GSFunctionMap.h"
|
||||
#include "GS/GSUtil.h"
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
||||
{
|
||||
|
@ -35,3 +38,5 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
public:
|
||||
GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -246,17 +246,11 @@ struct alignas(32) GSVertexSW
|
|||
#endif
|
||||
}
|
||||
|
||||
typedef void (*ConvertVertexBufferPtr)(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||
typedef void (*ConvertVertexBufferPtr)(const GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||
|
||||
static ConvertVertexBufferPtr s_cvb[4][2][2][2];
|
||||
|
||||
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
|
||||
static void ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||
|
||||
static const GSVector4 m_pos_scale;
|
||||
#if _M_SSE >= 0x501
|
||||
static const GSVector8 m_pos_scale2;
|
||||
#endif
|
||||
static void InitStatic();
|
||||
};
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
|
|
|
@ -456,6 +456,7 @@
|
|||
<ClCompile Include="GS\Renderers\SW\GSRasterizer.cpp" />
|
||||
<ClCompile Include="GS\Renderers\Common\GSRenderer.cpp" />
|
||||
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp" />
|
||||
<ClCompile Include="GS\Renderers\HW\GSRendererHWMultiISA.cpp" />
|
||||
<ClCompile Include="GS\Renderers\Null\GSRendererNull.cpp" />
|
||||
<ClCompile Include="GS\Renderers\SW\GSRendererSW.cpp" />
|
||||
<ClCompile Include="GS\Window\GSSetting.cpp" />
|
||||
|
|
|
@ -1607,6 +1607,9 @@
|
|||
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GS\Renderers\HW\GSRendererHWMultiISA.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GS\Renderers\HW\GSTextureCache.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -309,6 +309,7 @@
|
|||
<ClCompile Include="GS\Renderers\SW\GSRasterizer.cpp" />
|
||||
<ClCompile Include="GS\Renderers\Common\GSRenderer.cpp" />
|
||||
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp" />
|
||||
<ClCompile Include="GS\Renderers\HW\GSRendererHWMultiISA.cpp" />
|
||||
<ClCompile Include="GS\Renderers\Null\GSRendererNull.cpp" />
|
||||
<ClCompile Include="GS\Renderers\SW\GSRendererSW.cpp" />
|
||||
<ClCompile Include="GS\Window\GSSetting.cpp" />
|
||||
|
|
|
@ -1100,6 +1100,9 @@
|
|||
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GS\Renderers\HW\GSRendererHWMultiISA.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GS\Renderers\HW\GSTextureCache.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
|
||||
</ClCompile>
|
||||
|
|
Loading…
Reference in New Issue