GS: Mark SW renderer as multi-isa unshared

This commit is contained in:
TellowKrinkle 2021-12-18 20:11:24 -06:00 committed by TellowKrinkle
parent bc74c9064a
commit b0da6361cb
28 changed files with 583 additions and 462 deletions

View File

@ -749,6 +749,7 @@ set(pcsx2GSHeaders
GS/Renderers/Null/GSRendererNull.h
GS/Renderers/Null/GSTextureNull.h
GS/Renderers/HW/GSRendererHW.h
GS/Renderers/HW/GSRendererHWMultiISA.cpp
GS/Renderers/HW/GSTextureCache.h
GS/Renderers/HW/GSTextureReplacements.h
GS/Renderers/HW/GSVertexHW.h

View File

@ -22,12 +22,12 @@
#include "GSGL.h"
#include "GSUtil.h"
#include "GSExtra.h"
#include "Renderers/SW/GSRendererSW.h"
#include "Renderers/Null/GSRendererNull.h"
#include "Renderers/Null/GSDeviceNull.h"
#include "Renderers/HW/GSRendererHW.h"
#include "Renderers/HW/GSTextureReplacements.h"
#include "GSLzma.h"
#include "MultiISA.h"
#include "common/Console.h"
#include "common/FileSystem.h"
@ -91,7 +91,7 @@ int GSinit()
// const type qualifier from all the affected variables.
GSinitConfig();
GSVertexSW::InitStatic();
GSUtil::Init();
@ -262,7 +262,7 @@ static bool DoGSOpen(GSRendererType renderer, u8* basemem)
else
{
const int threads = theApp.GetConfigI("extrathreads");
g_gs_renderer = std::make_unique<GSRendererSW>(threads);
g_gs_renderer = std::unique_ptr<GSRenderer>(MULTI_ISA_SELECT(makeGSRendererSW)(threads));
}
}
catch (std::exception& ex)

View File

@ -56,3 +56,11 @@ public:
_aligned_free(p);
}
};
/// GSAlignedClass with a virtual destructor
template <int i>
class GSVirtualAlignedClass : public GSAlignedClass<i>
{
public:
virtual ~GSVirtualAlignedClass() {}
};

View File

@ -79,7 +79,7 @@ static int extend(int uv, int size)
return size;
}
GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap)
GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap) const
{
if (mipmap)
return TEX0; // no mipmaping allowed

View File

@ -140,7 +140,7 @@ public:
return ZBUF.ZMSK == 0 && TEST.ZTE != 0; // ZTE == 0 is bug on the real hardware, write is blocked then
}
GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false);
GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false) const;
void ComputeFixedTEX0(const GSVector4& st);
bool HasFixedTEX0() const { return m_fixed_tex0; }

View File

@ -83,3 +83,6 @@ extern const ProcessorFeatures g_cpu;
#define MULTI_ISA_FRIEND(klass) friend class isa_native::klass;
#define MULTI_ISA_SELECT(fn) (isa_native::fn)
#endif
class GSRenderer;
MULTI_ISA_DEF(GSRenderer* makeGSRendererSW(int threads);)

View File

@ -17,8 +17,6 @@
#include "GSRendererHW.h"
#include "GSTextureReplacements.h"
#include "GS/GSGL.h"
#include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/Renderers/SW/GSDrawScanline.h"
#include "Host.h"
#include "common/Align.h"
#include "common/StringUtil.h"
@ -35,6 +33,7 @@ GSRendererHW::GSRendererHW()
, m_userhacks_tcoffset_y(0)
, m_lod(GSVector2i(0, 0))
{
MULTI_ISA_SELECT(GSRendererHWPopulateFunctions)(*this);
m_mipmap = (GSConfig.HWMipmap >= HWMipmapLevel::Basic);
SetTCOffset();
@ -1403,7 +1402,7 @@ void GSRendererHW::Draw()
const bool single_page = (delta_p.x <= 64.0f) && (delta_p.y <= 64.0f);
// We trigger the sw prim render here super early, to avoid creating superfluous render targets.
if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender())
if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this))
{
GL_CACHE("Possible texture decompression, drawn with SwPrimRender()");
return;
@ -1416,7 +1415,7 @@ void GSRendererHW::Draw()
m_mem.m_clut.ClearDrawInvalidity();
if (result)
{
if (SwPrimRender())
if (SwPrimRender(*this))
{
GL_CACHE("Possible clut draw, drawn with SwPrimRender()");
return;
@ -4118,423 +4117,6 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t
return true;
}
bool GSRendererHW::SwPrimRender()
{
const GSDrawingContext* context = m_context;
const GSDrawingEnvironment& env = m_env;
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
GSDrawScanline::SharedData data;
GSScanlineGlobalData& gd = data.global;
u32 clut_storage[256] = {0};
GSVector4i dimx_storage[8];
m_sw_vertex_buffer.resize(((m_vertex.next + 1) & ~1));
data.primclass = m_vt.m_primclass;
data.buff = nullptr;
data.vertex = m_sw_vertex_buffer.data();
data.vertex_count = m_vertex.next;
data.index = m_index.buff;
data.index_count = m_index.tail;
data.scanmsk_value = m_env.SCANMSK.MSK;
// Skip per pixel division if q is constant.
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !m_vt.m_eq.q.
// If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR'.
const u32 q_div = ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS));
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, data.vertex, m_vertex.buff, m_vertex.next);
GSVector4i scissor = GSVector4i(m_context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
// Points and lines may have zero area bbox (single line: 0, 0 - 256, 0)
if (m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS)
{
if (bbox.x == bbox.z)
bbox.z++;
if (bbox.y == bbox.w)
bbox.w++;
}
data.scissor = scissor;
data.bbox = bbox;
data.frame = g_perfmon.GetFrame();
gd.vm = m_mem.m_vm8;
gd.fbo = context->offset.fb;
gd.zbo = context->offset.zb;
gd.fzbr = context->offset.fzb4->row;
gd.fzbc = context->offset.fzb4->col;
gd.sel.key = 0;
gd.sel.fpsm = 3;
gd.sel.zpsm = 3;
gd.sel.atst = ATST_ALWAYS;
gd.sel.tfx = TFX_NONE;
gd.sel.ababcd = 0xff;
gd.sel.prim = primclass;
u32 fm = context->FRAME.FBMSK;
u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
const u32 fm_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk;
// When the format is 24bit (Z or C), DATE ceases to function.
// It was believed that in 24bit mode all pixels pass because alpha doesn't exist
// however after testing this on a PS2 it turns out nothing passes, it ignores the draw.
if ((m_context->FRAME.PSM & 0xF) == PSM_PSMCT24 && m_context->TEST.DATE)
{
//DevCon.Warning("DATE on a 24bit format, Frame PSM %x", m_context->FRAME.PSM);
return false;
}
if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
{
fm = 0xffffffff;
zm = 0xffffffff;
}
if (PRIM->TME)
{
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
}
}
if (context->TEST.ATE)
{
if (!TryAlphaTest(fm, fm_mask, zm))
{
gd.sel.atst = context->TEST.ATST;
gd.sel.afail = context->TEST.AFAIL;
gd.aref = GSVector4i((int)context->TEST.AREF);
switch (gd.sel.atst)
{
case ATST_LESS:
gd.sel.atst = ATST_LEQUAL;
gd.aref -= GSVector4i::x00000001();
break;
case ATST_GREATER:
gd.sel.atst = ATST_GEQUAL;
gd.aref += GSVector4i::x00000001();
break;
}
}
}
const bool fwrite = (fm & fm_mask) != fm_mask;
const bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
const bool zwrite = zm != 0xffffffff;
const bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
if (!fwrite && !zwrite)
return false;
gd.sel.fwrite = fwrite;
gd.sel.ftest = ftest;
if (fwrite || ftest)
{
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
if ((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
{
gd.sel.iip = PRIM->IIP;
}
if (PRIM->TME)
{
gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST;
gd.sel.ltf = m_vt.IsLinear();
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
gd.sel.tlu = 1;
gd.clut = clut_storage; // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
}
gd.sel.wms = context->CLAMP.WMS;
gd.sel.wmt = context->CLAMP.WMT;
if (gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
{
// modulate does not do anything when vertex color is 0x80
gd.sel.tfx = TFX_DECAL;
}
GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), false);
const GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage;
if (!m_sw_texture)
m_sw_texture = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA);
else
m_sw_texture->Reset(0, TEX0, env.TEXA);
m_sw_texture->Update(r);
gd.tex[0] = m_sw_texture->m_buff;
gd.sel.tw = m_sw_texture->m_tw - 3;
{
// skip per pixel division if q is constant. Sprite uses flat
// q, so it's always constant by primitive.
// Note: the 'q' division was done in GSRendererSW::ConvertVertexBuffer
gd.sel.fst |= (m_vt.m_eq.q || primclass == GS_SPRITE_CLASS);
if (gd.sel.ltf && gd.sel.fst)
{
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
// TODO: but not when mipmapping is used!!!
GSVertexSW* RESTRICT v = data.vertex;
const GSVector4 half(0x8000, 0x8000);
for (int i = 0, j = data.vertex_count; i < j; i++)
{
const GSVector4 t = v[i].t;
v[i].t = (t - half).xyzw(t);
}
}
}
u16 tw = 1u << TEX0.TW;
u16 th = 1u << TEX0.TH;
if (tw > 1024)
tw = 1;
if (th > 1024)
th = 1;
switch (context->CLAMP.WMS)
{
case CLAMP_REPEAT:
gd.t.min.U16[0] = gd.t.minmax.U16[0] = tw - 1;
gd.t.max.U16[0] = gd.t.minmax.U16[2] = 0;
gd.t.mask.U32[0] = 0xffffffff;
break;
case CLAMP_CLAMP:
gd.t.min.U16[0] = gd.t.minmax.U16[0] = 0;
gd.t.max.U16[0] = gd.t.minmax.U16[2] = tw - 1;
gd.t.mask.U32[0] = 0;
break;
case CLAMP_REGION_CLAMP:
// REGION_CLAMP ignores the actual texture size
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU;
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU;
gd.t.mask.U32[0] = 0;
break;
case CLAMP_REGION_REPEAT:
// MINU is restricted to MINU or texture size, whichever is smaller, MAXU is an offset in the texture.
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU & (tw - 1);
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU;
gd.t.mask.U32[0] = 0xffffffff;
break;
default:
__assume(0);
}
switch (context->CLAMP.WMT)
{
case CLAMP_REPEAT:
gd.t.min.U16[4] = gd.t.minmax.U16[1] = th - 1;
gd.t.max.U16[4] = gd.t.minmax.U16[3] = 0;
gd.t.mask.U32[2] = 0xffffffff;
break;
case CLAMP_CLAMP:
gd.t.min.U16[4] = gd.t.minmax.U16[1] = 0;
gd.t.max.U16[4] = gd.t.minmax.U16[3] = th - 1;
gd.t.mask.U32[2] = 0;
break;
case CLAMP_REGION_CLAMP:
// REGION_CLAMP ignores the actual texture size
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV;
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV; // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
gd.t.mask.U32[2] = 0;
break;
case CLAMP_REGION_REPEAT:
// MINV is restricted to MINV or texture size, whichever is smaller, MAXV is an offset in the texture.
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV;
gd.t.mask.U32[2] = 0xffffffff;
break;
default:
__assume(0);
}
gd.t.min = gd.t.min.xxxxlh();
gd.t.max = gd.t.max.xxxxlh();
gd.t.mask = gd.t.mask.xxzz();
gd.t.invmask = ~gd.t.mask;
}
if (PRIM->FGE)
{
gd.sel.fge = 1;
gd.frb = env.FOGCOL.U32[0] & 0x00ff00ff;
gd.fga = (env.FOGCOL.U32[0] >> 8) & 0x00ff00ff;
}
if (context->FRAME.PSM != PSM_PSMCT24)
{
gd.sel.date = context->TEST.DATE;
gd.sel.datm = context->TEST.DATM;
}
if (!IsOpaque())
{
gd.sel.abe = PRIM->ABE;
gd.sel.ababcd = context->ALPHA.U32[0];
if (env.PABE.PABE)
{
gd.sel.pabe = 1;
}
if (PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS))
{
gd.sel.aa1 = 1;
}
gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh();
}
const u32 masked_fm = fm & fm_mask;
if (gd.sel.date
|| gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1
|| gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY
|| gd.sel.fpsm == 0 && masked_fm != 0 && masked_fm != fm_mask
|| gd.sel.fpsm == 1 && masked_fm != 0 && masked_fm != fm_mask
|| gd.sel.fpsm == 2 && masked_fm != 0 && masked_fm != fm_mask)
{
gd.sel.rfb = 1;
}
gd.sel.colclamp = env.COLCLAMP.CLAMP;
gd.sel.fba = context->FBA.FBA;
if (env.DTHE.DTHE)
{
gd.sel.dthe = 1;
gd.dimx = dimx_storage;
memcpy(gd.dimx, env.dimx, sizeof(env.dimx));
}
}
gd.sel.zwrite = zwrite;
gd.sel.ztest = ztest;
if (zwrite || ztest)
{
const u32 z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8);
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS;
gd.sel.zequal = !!m_vt.m_eq.z;
gd.sel.zoverflow = (u32)GSVector4i(m_vt.m_max.p).z == 0x80000000U;
gd.sel.zclamp = (u32)GSVector4i(m_vt.m_max.p).z > z_max;
}
#if _M_SSE >= 0x501
gd.fm = fm;
gd.zm = zm;
if (gd.sel.fpsm == 1)
{
gd.fm |= 0xff000000;
}
else if (gd.sel.fpsm == 2)
{
u32 rb = gd.fm & 0x00f800f8;
u32 ga = gd.fm & 0x8000f800;
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000;
}
if (gd.sel.zpsm == 1)
{
gd.zm |= 0xff000000;
}
else if (gd.sel.zpsm == 2)
{
gd.zm |= 0xffff0000;
}
#else
gd.fm = GSVector4i(fm);
gd.zm = GSVector4i(zm);
if (gd.sel.fpsm == 1)
{
gd.fm |= GSVector4i::xff000000();
}
else if (gd.sel.fpsm == 2)
{
GSVector4i rb = gd.fm & 0x00f800f8;
GSVector4i ga = gd.fm & 0x8000f800;
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
}
if (gd.sel.zpsm == 1)
{
gd.zm |= GSVector4i::xff000000();
}
else if (gd.sel.zpsm == 2)
{
gd.zm |= GSVector4i::xffff0000();
}
#endif
if (gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data.bbox.eq(data.bbox.rintersect(data.scissor))) // TODO: check scissor horizontally only
{
gd.sel.notest = 1;
const u32 ofx = context->XYOFFSET.OFX;
for (int i = 0, j = m_vertex.tail; i < j; i++)
{
#if _M_SSE >= 0x501
if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
#else
if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
#endif
{
gd.sel.notest = 0;
break;
}
}
}
if (!m_sw_rasterizer)
m_sw_rasterizer = std::make_unique<GSRasterizer>(new GSDrawScanline(), 0, 1);
m_sw_rasterizer->Draw(&data);
m_tc->InvalidateVideoMem(context->offset.fb, bbox);
return true;
}
// hacks
GSRendererHW::Hacks::Hacks()

View File

@ -20,11 +20,15 @@
#include "GS/Renderers/Common/GSRenderer.h"
#include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/GSState.h"
#include "GS/MultiISA.h"
class GSRasterizer;
class GSRendererHW;
MULTI_ISA_DEF(class GSRendererHWFunctions;)
MULTI_ISA_DEF(void GSRendererHWPopulateFunctions(GSRendererHW& renderer);)
class GSRendererHW : public GSRenderer
{
MULTI_ISA_FRIEND(GSRendererHWFunctions);
public:
static constexpr int MAX_FRAMEBUFFER_HEIGHT = 1280;
@ -130,7 +134,7 @@ private:
bool PossibleCLUTDraw();
bool PossibleCLUTDrawAggressive();
bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex);
bool SwPrimRender();
bool (*SwPrimRender)(GSRendererHW&);
template <bool linear>
void RoundSpriteOffset();
@ -166,7 +170,7 @@ private:
// software sprite renderer state
std::vector<GSVertexSW> m_sw_vertex_buffer;
std::unique_ptr<GSTextureCacheSW::Texture> m_sw_texture;
std::unique_ptr<GSRasterizer> m_sw_rasterizer;
std::unique_ptr<GSVirtualAlignedClass<32>> m_sw_rasterizer;
public:
GSRendererHW();

View File

@ -0,0 +1,461 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "GSRendererHW.h"
#include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/Renderers/SW/GSDrawScanline.h"
class CURRENT_ISA::GSRendererHWFunctions
{
public:
static bool SwPrimRender(GSRendererHW& hw);
static void Populate(GSRendererHW& renderer)
{
renderer.SwPrimRender = SwPrimRender;
}
};
MULTI_ISA_UNSHARED_IMPL;
void CURRENT_ISA::GSRendererHWPopulateFunctions(GSRendererHW& renderer)
{
GSRendererHWFunctions::Populate(renderer);
}
bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw)
{
GSVertexTrace& vt = hw.m_vt;
const GIFRegPRIM* PRIM = hw.PRIM;
const GSDrawingContext* context = hw.m_context;
const GSDrawingEnvironment& env = hw.m_env;
const GS_PRIM_CLASS primclass = vt.m_primclass;
GSDrawScanline::SharedData data;
GSScanlineGlobalData& gd = data.global;
u32 clut_storage[256];
GSVector4i dimx_storage[8];
hw.m_sw_vertex_buffer.resize(((hw.m_vertex.next + 1) & ~1));
data.primclass = vt.m_primclass;
data.buff = nullptr;
data.vertex = hw.m_sw_vertex_buffer.data();
data.vertex_count = hw.m_vertex.next;
data.index = hw.m_index.buff;
data.index_count = hw.m_index.tail;
data.scanmsk_value = hw.m_env.SCANMSK.MSK;
// Skip per pixel division if q is constant.
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !vt.m_eq.q.
// If you have both GS_SPRITE_CLASS && vt.m_eq.q, it will depends on the first part of the 'OR'.
const u32 q_div = ((vt.m_eq.q && vt.m_min.t.z != 1.0f) || (!vt.m_eq.q && vt.m_primclass == GS_SPRITE_CLASS));
GSVertexSW::s_cvb[vt.m_primclass][PRIM->TME][PRIM->FST][q_div](context, data.vertex, hw.m_vertex.buff, hw.m_vertex.next);
GSVector4i scissor = GSVector4i(context->scissor.in);
GSVector4i bbox = GSVector4i(vt.m_min.p.floor().xyxy(vt.m_max.p.ceil()));
// Points and lines may have zero area bbox (single line: 0, 0 - 256, 0)
if (vt.m_primclass == GS_POINT_CLASS || vt.m_primclass == GS_LINE_CLASS)
{
if (bbox.x == bbox.z)
bbox.z++;
if (bbox.y == bbox.w)
bbox.w++;
}
data.scissor = scissor;
data.bbox = bbox;
data.frame = g_perfmon.GetFrame();
gd.vm = hw.m_mem.m_vm8;
gd.fbo = context->offset.fb;
gd.zbo = context->offset.zb;
gd.fzbr = context->offset.fzb4->row;
gd.fzbc = context->offset.fzb4->col;
gd.sel.key = 0;
gd.sel.fpsm = 3;
gd.sel.zpsm = 3;
gd.sel.atst = ATST_ALWAYS;
gd.sel.tfx = TFX_NONE;
gd.sel.ababcd = 0xff;
gd.sel.prim = primclass;
u32 fm = context->FRAME.FBMSK;
u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
const u32 fm_mask = GSLocalMemory::m_psm[context->FRAME.PSM].fmsk;
// When the format is 24bit (Z or C), DATE ceases to function.
// It was believed that in 24bit mode all pixels pass because alpha doesn't exist
// however after testing this on a PS2 it turns out nothing passes, it ignores the draw.
if ((context->FRAME.PSM & 0xF) == PSM_PSMCT24 && context->TEST.DATE)
{
//DevCon.Warning("DATE on a 24bit format, Frame PSM %x", context->FRAME.PSM);
return false;
}
if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
{
fm = 0xffffffff;
zm = 0xffffffff;
}
if (PRIM->TME)
{
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
hw.m_mem.m_clut.Read32(context->TEX0, env.TEXA);
}
}
if (context->TEST.ATE)
{
if (!hw.TryAlphaTest(fm, fm_mask, zm))
{
gd.sel.atst = context->TEST.ATST;
gd.sel.afail = context->TEST.AFAIL;
gd.aref = GSVector4i((int)context->TEST.AREF);
switch (gd.sel.atst)
{
case ATST_LESS:
gd.sel.atst = ATST_LEQUAL;
gd.aref -= GSVector4i::x00000001();
break;
case ATST_GREATER:
gd.sel.atst = ATST_GEQUAL;
gd.aref += GSVector4i::x00000001();
break;
}
}
}
const bool fwrite = (fm & fm_mask) != fm_mask;
const bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
const bool zwrite = zm != 0xffffffff;
const bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
if (!fwrite && !zwrite)
return false;
gd.sel.fwrite = fwrite;
gd.sel.ftest = ftest;
if (fwrite || ftest)
{
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
if ((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && vt.m_eq.rgba != 0xffff)
{
gd.sel.iip = PRIM->IIP;
}
if (PRIM->TME)
{
gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST;
gd.sel.ltf = vt.IsLinear();
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
gd.sel.tlu = 1;
gd.clut = clut_storage; // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
memcpy(gd.clut, (const u32*)hw.m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
}
gd.sel.wms = context->CLAMP.WMS;
gd.sel.wmt = context->CLAMP.WMT;
if (gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && vt.m_eq.rgba == 0xffff && vt.m_min.c.eq(GSVector4i(128)))
{
// modulate does not do anything when vertex color is 0x80
gd.sel.tfx = TFX_DECAL;
}
GIFRegTEX0 TEX0 = context->GetSizeFixedTEX0(vt.m_min.t.xyxy(vt.m_max.t), vt.IsLinear(), false);
const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage;
if (!hw.m_sw_texture)
hw.m_sw_texture = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA);
else
hw.m_sw_texture->Reset(0, TEX0, env.TEXA);
hw.m_sw_texture->Update(r);
gd.tex[0] = hw.m_sw_texture->m_buff;
gd.sel.tw = hw.m_sw_texture->m_tw - 3;
{
// skip per pixel division if q is constant. Sprite uses flat
// q, so it's always constant by primitive.
// Note: the 'q' division was done in GSRendererSW::ConvertVertexBuffer
gd.sel.fst |= (vt.m_eq.q || primclass == GS_SPRITE_CLASS);
if (gd.sel.ltf && gd.sel.fst)
{
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
// TODO: but not when mipmapping is used!!!
const GSVector4 half(0x8000, 0x8000);
GSVertexSW* RESTRICT v = data.vertex;
for (int i = 0, j = data.vertex_count; i < j; i++)
{
const GSVector4 t = v[i].t;
v[i].t = (t - half).xyzw(t);
}
}
}
u16 tw = 1u << TEX0.TW;
u16 th = 1u << TEX0.TH;
if (tw > 1024)
tw = 1;
if (th > 1024)
th = 1;
switch (context->CLAMP.WMS)
{
case CLAMP_REPEAT:
gd.t.min.U16[0] = gd.t.minmax.U16[0] = tw - 1;
gd.t.max.U16[0] = gd.t.minmax.U16[2] = 0;
gd.t.mask.U32[0] = 0xffffffff;
break;
case CLAMP_CLAMP:
gd.t.min.U16[0] = gd.t.minmax.U16[0] = 0;
gd.t.max.U16[0] = gd.t.minmax.U16[2] = tw - 1;
gd.t.mask.U32[0] = 0;
break;
case CLAMP_REGION_CLAMP:
// REGION_CLAMP ignores the actual texture size
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU;
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU;
gd.t.mask.U32[0] = 0;
break;
case CLAMP_REGION_REPEAT:
// MINU is restricted to MINU or texture size, whichever is smaller, MAXU is an offset in the texture.
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU & (tw - 1);
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU;
gd.t.mask.U32[0] = 0xffffffff;
break;
default:
__assume(0);
}
switch (context->CLAMP.WMT)
{
case CLAMP_REPEAT:
gd.t.min.U16[4] = gd.t.minmax.U16[1] = th - 1;
gd.t.max.U16[4] = gd.t.minmax.U16[3] = 0;
gd.t.mask.U32[2] = 0xffffffff;
break;
case CLAMP_CLAMP:
gd.t.min.U16[4] = gd.t.minmax.U16[1] = 0;
gd.t.max.U16[4] = gd.t.minmax.U16[3] = th - 1;
gd.t.mask.U32[2] = 0;
break;
case CLAMP_REGION_CLAMP:
// REGION_CLAMP ignores the actual texture size
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV;
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV; // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
gd.t.mask.U32[2] = 0;
break;
case CLAMP_REGION_REPEAT:
// MINV is restricted to MINV or texture size, whichever is smaller, MAXV is an offset in the texture.
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV;
gd.t.mask.U32[2] = 0xffffffff;
break;
default:
__assume(0);
}
gd.t.min = gd.t.min.xxxxlh();
gd.t.max = gd.t.max.xxxxlh();
gd.t.mask = gd.t.mask.xxzz();
gd.t.invmask = ~gd.t.mask;
}
if (PRIM->FGE)
{
gd.sel.fge = 1;
gd.frb = env.FOGCOL.U32[0] & 0x00ff00ff;
gd.fga = (env.FOGCOL.U32[0] >> 8) & 0x00ff00ff;
}
if (context->FRAME.PSM != PSM_PSMCT24)
{
gd.sel.date = context->TEST.DATE;
gd.sel.datm = context->TEST.DATM;
}
if (!hw.IsOpaque())
{
gd.sel.abe = PRIM->ABE;
gd.sel.ababcd = context->ALPHA.U32[0];
if (env.PABE.PABE)
{
gd.sel.pabe = 1;
}
if (PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS))
{
gd.sel.aa1 = 1;
}
gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh();
}
const u32 masked_fm = fm & fm_mask;
if (gd.sel.date
|| gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1
|| gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY
|| gd.sel.fpsm == 0 && masked_fm != 0 && masked_fm != fm_mask
|| gd.sel.fpsm == 1 && masked_fm != 0 && masked_fm != fm_mask
|| gd.sel.fpsm == 2 && masked_fm != 0 && masked_fm != fm_mask)
{
gd.sel.rfb = 1;
}
gd.sel.colclamp = env.COLCLAMP.CLAMP;
gd.sel.fba = context->FBA.FBA;
if (env.DTHE.DTHE)
{
gd.sel.dthe = 1;
gd.dimx = dimx_storage;
memcpy(gd.dimx, env.dimx, sizeof(env.dimx));
}
}
gd.sel.zwrite = zwrite;
gd.sel.ztest = ztest;
if (zwrite || ztest)
{
const u32 z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8);
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS;
gd.sel.zequal = !!vt.m_eq.z;
gd.sel.zoverflow = (u32)GSVector4i(vt.m_max.p).z == 0x80000000U;
gd.sel.zclamp = (u32)GSVector4i(vt.m_max.p).z > z_max;
}
#if _M_SSE >= 0x501
gd.fm = fm;
gd.zm = zm;
if (gd.sel.fpsm == 1)
{
gd.fm |= 0xff000000;
}
else if (gd.sel.fpsm == 2)
{
u32 rb = gd.fm & 0x00f800f8;
u32 ga = gd.fm & 0x8000f800;
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000;
}
if (gd.sel.zpsm == 1)
{
gd.zm |= 0xff000000;
}
else if (gd.sel.zpsm == 2)
{
gd.zm |= 0xffff0000;
}
#else
gd.fm = GSVector4i(fm);
gd.zm = GSVector4i(zm);
if (gd.sel.fpsm == 1)
{
gd.fm |= GSVector4i::xff000000();
}
else if (gd.sel.fpsm == 2)
{
GSVector4i rb = gd.fm & 0x00f800f8;
GSVector4i ga = gd.fm & 0x8000f800;
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
}
if (gd.sel.zpsm == 1)
{
gd.zm |= GSVector4i::xff000000();
}
else if (gd.sel.zpsm == 2)
{
gd.zm |= GSVector4i::xffff0000();
}
#endif
if (gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data.bbox.eq(data.bbox.rintersect(data.scissor))) // TODO: check scissor horizontally only
{
gd.sel.notest = 1;
const u32 ofx = context->XYOFFSET.OFX;
for (int i = 0, j = hw.m_vertex.tail; i < j; i++)
{
#if _M_SSE >= 0x501
if ((((hw.m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
#else
if ((((hw.m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
#endif
{
gd.sel.notest = 0;
break;
}
}
}
if (!hw.m_sw_rasterizer)
hw.m_sw_rasterizer = std::make_unique<GSRasterizer>(new GSDrawScanline(), 0, 1);
static_cast<GSRasterizer*>(hw.m_sw_rasterizer.get())->Draw(&data);
hw.m_tc->InvalidateVideoMem(context->offset.fb, bbox);
return true;
}

View File

@ -17,8 +17,12 @@
#include "GSDrawScanline.h"
#include "GSTextureCacheSW.h"
#if MULTI_ISA_COMPILE_ONCE
// Lack of a better home
constexpr GSScanlineConstantData g_const;
#endif
MULTI_ISA_UNSHARED_IMPL;
GSDrawScanline::GSDrawScanline()
: m_sp_map("GSSetupPrim", &m_local)

View File

@ -21,6 +21,8 @@
#include "GSSetupPrimCodeGenerator.h"
#include "GSDrawScanlineCodeGenerator.h"
MULTI_ISA_UNSHARED_START
class GSDrawScanline : public IDrawScanline
{
public:
@ -85,3 +87,5 @@ public:
m_ds_map.PrintStats();
}
};
MULTI_ISA_UNSHARED_END

View File

@ -18,6 +18,7 @@
#include "GS/Renderers/Common/GSFunctionMap.h"
#include "GSVertexSW.h"
MULTI_ISA_UNSHARED_IMPL;
using namespace Xbyak;
// Ease the reading of the code

View File

@ -17,6 +17,7 @@
#include "GSScanlineEnvironment.h"
#include "GSNewCodeGenerator.h"
#include "GS/MultiISA.h"
#undef _t // Conflict with wx, hopefully no one needs this
@ -30,6 +31,8 @@
#define DRAW_SCANLINE_USING_YMM 0
#endif
MULTI_ISA_UNSHARED_START
class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
{
using _parent = GSNewCodeGenerator;
@ -187,3 +190,5 @@ private:
int pixels, int mip_offset);
void ReadTexelImpl(const Xmm& dst, const Xmm& addr, u8 i, bool texInA3, bool preserveDst);
};
MULTI_ISA_UNSHARED_END

View File

@ -21,11 +21,13 @@
#include <map>
#include <mutex>
static std::map<u64, bool> s_use_c_draw_scanline;
static std::mutex s_use_c_draw_scanline_mutex;
MULTI_ISA_UNSHARED_IMPL;
static bool shouldUseCDrawScanline(u64 key)
{
static std::map<u64, bool> s_use_c_draw_scanline;
static std::mutex s_use_c_draw_scanline_mutex;
static const char* const fname = getenv("USE_C_DRAW_SCANLINE");
if (!fname)
return false;

View File

@ -18,6 +18,7 @@
#include "GSScanlineEnvironment.h"
#include "GS/Renderers/Common/GSFunctionMap.h"
#include "GS/GSUtil.h"
#include "GS/MultiISA.h"
#if defined(_M_AMD64) || defined(_WIN64)
#define RegLong Xbyak::Reg64
@ -25,6 +26,8 @@
#define RegLong Xbyak::Reg32
#endif
MULTI_ISA_UNSHARED_START
class GSDrawScanlineCodeGenerator : public GSCodeGenerator
{
void operator=(const GSDrawScanlineCodeGenerator&);
@ -36,3 +39,5 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
public:
GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
};
MULTI_ISA_UNSHARED_END

View File

@ -27,6 +27,8 @@
#define ENABLE_DRAW_STATS 0
MULTI_ISA_UNSHARED_IMPL;
int GSRasterizerData::s_counter = 0;
static int compute_best_thread_height(int threads)

View File

@ -21,6 +21,9 @@
#include "GS/GSPerfMon.h"
#include "GS/GSThread_CXX11.h"
#include "GS/GSRingHeap.h"
#include "GS/MultiISA.h"
MULTI_ISA_UNSHARED_START
class alignas(32) GSRasterizerData : public GSAlignedClass<32>
{
@ -113,7 +116,7 @@ public:
__forceinline bool IsSolidRect() const { return m_dr != NULL; }
};
class IRasterizer : public GSAlignedClass<32>
class IRasterizer : public GSVirtualAlignedClass<32>
{
public:
virtual ~IRasterizer() {}
@ -234,3 +237,5 @@ public:
int GetPixels(bool reset);
void PrintStats() {}
};
MULTI_ISA_UNSHARED_END

View File

@ -18,14 +18,18 @@
#include "GS/GSGL.h"
#include "common/StringUtil.h"
MULTI_ISA_UNSHARED_IMPL;
GSRenderer* CURRENT_ISA::makeGSRendererSW(int threads)
{
return new GSRendererSW(threads);
}
#define LOG 0
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
CONSTINIT const GSVector4 GSVertexSW::m_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#if _M_SSE >= 0x501
CONSTINIT const GSVector8 GSVertexSW::m_pos_scale2 = GSVector8::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#endif
static constexpr GSVector4 s_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
GSRendererSW::GSRendererSW(int threads)
: GSRenderer(), m_fzb(NULL)
@ -223,9 +227,20 @@ GSTexture* GSRendererSW::GetFeedbackOutput()
return nullptr;
}
MULTI_ISA_DEF(void GSVertexSWInitStatic();)
#if MULTI_ISA_COMPILE_ONCE
GSVertexSW::ConvertVertexBufferPtr GSVertexSW::s_cvb[4][2][2][2];
void GSVertexSW::InitStatic()
{
MULTI_ISA_SELECT(GSVertexSWInitStatic)();
}
#endif
MULTI_ISA_UNSHARED_START
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
void ConvertVertexBuffer(const GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
{
// FIXME q_div wasn't added to AVX2 code path.
@ -274,7 +289,7 @@ void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW*
if (primclass == GS_SPRITE_CLASS)
{
dst->p = GSVector4(xy).xyyw(GSVector4(xyzuvf)) * m_pos_scale;
dst->p = GSVector4(xy).xyyw(GSVector4(xyzuvf)) * s_pos_scale;
xyzuvf = xyzuvf.min_u32(z_max);
t = t.insert32<1, 3>(GSVector4::cast(xyzuvf));
@ -282,7 +297,7 @@ void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW*
else
{
double z = static_cast<double>(static_cast<u32>(xyzuvf.extract32<1>()));
dst->p = (GSVector4(xy) * m_pos_scale).upld(GSVector4::f64(z, 0.0));
dst->p = (GSVector4(xy) * s_pos_scale).upld(GSVector4::f64(z, 0.0));
t = t.blend32<8>(GSVector4(xyzuvf << 7));
}
@ -296,22 +311,23 @@ void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW*
}
}
// clang-format off
GSVertexSW::ConvertVertexBufferPtr GSVertexSW::s_cvb[4][2][2][2] = {
#define InitCVB3(P, T, F) { &GSVertexSW::ConvertVertexBuffer<P, T, F, 0>, &GSVertexSW::ConvertVertexBuffer<P, T, F, 1> }
#define InitCVB2(P, T) { InitCVB3(P, T, 0), InitCVB3(P, T, 1) }
#define InitCVB(P) { InitCVB2(static_cast<u32>(P), 0), InitCVB2(static_cast<u32>(P), 1) }
InitCVB(GS_POINT_CLASS),
InitCVB(GS_LINE_CLASS),
InitCVB(GS_TRIANGLE_CLASS),
InitCVB(GS_SPRITE_CLASS)
#undef InitCVB
void GSVertexSWInitStatic()
{
#define InitCVB4(P, T, F, Q) GSVertexSW::s_cvb[P][T][F][Q] = ConvertVertexBuffer<P, T, F, Q>;
#define InitCVB3(P, T, F) InitCVB4(P, T, F, 0) InitCVB4(P, T, F, 1)
#define InitCVB2(P, T) InitCVB3(P, T, 0) InitCVB3(P, T, 1)
#define InitCVB1(P) InitCVB2(P, 0) InitCVB2(P, 1)
InitCVB1(GS_POINT_CLASS)
InitCVB1(GS_LINE_CLASS)
InitCVB1(GS_TRIANGLE_CLASS)
InitCVB1(GS_SPRITE_CLASS)
#undef InitCVB1
#undef InitCVB2
#undef InitCVB3
};
// clang-format on
#undef InitCVB4
}
MULTI_ISA_UNSHARED_END
void GSRendererSW::Draw()
{

View File

@ -18,6 +18,9 @@
#include "GSTextureCacheSW.h"
#include "GSDrawScanline.h"
#include "GS/GSRingHeap.h"
#include "GS/MultiISA.h"
MULTI_ISA_UNSHARED_START
class GSRendererSW final : public GSRenderer
{
@ -95,3 +98,5 @@ public:
void Destroy() override;
};
MULTI_ISA_UNSHARED_END

View File

@ -17,6 +17,7 @@
#include "GSSetupPrimCodeGenerator.all.h"
#include "GSVertexSW.h"
MULTI_ISA_UNSHARED_IMPL;
using namespace Xbyak;
#define _rip_local(field) ((m_rip) ? ptr[rip + (char*)&m_local.field] : ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)])

View File

@ -17,6 +17,7 @@
#include "GSScanlineEnvironment.h"
#include "GSNewCodeGenerator.h"
#include "GS/MultiISA.h"
#if _M_SSE >= 0x501
#define SETUP_PRIM_VECTOR_REGISTER Xbyak::Ymm
@ -28,6 +29,8 @@
#define SETUP_PRIM_USING_YMM 0
#endif
MULTI_ISA_UNSHARED_START
class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
{
using _parent = GSNewCodeGenerator;
@ -77,3 +80,5 @@ private:
void Texture();
void Color();
};
MULTI_ISA_UNSHARED_END

View File

@ -17,7 +17,7 @@
#include "GSSetupPrimCodeGenerator.h"
#include "GSSetupPrimCodeGenerator.all.h"
using namespace Xbyak;
MULTI_ISA_UNSHARED_IMPL;
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)

View File

@ -18,6 +18,9 @@
#include "GSScanlineEnvironment.h"
#include "GS/Renderers/Common/GSFunctionMap.h"
#include "GS/GSUtil.h"
#include "GS/MultiISA.h"
MULTI_ISA_UNSHARED_START
class GSSetupPrimCodeGenerator : public GSCodeGenerator
{
@ -35,3 +38,5 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
public:
GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
};
MULTI_ISA_UNSHARED_END

View File

@ -246,17 +246,11 @@ struct alignas(32) GSVertexSW
#endif
}
typedef void (*ConvertVertexBufferPtr)(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
typedef void (*ConvertVertexBufferPtr)(const GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
static ConvertVertexBufferPtr s_cvb[4][2][2][2];
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
static void ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
static const GSVector4 m_pos_scale;
#if _M_SSE >= 0x501
static const GSVector8 m_pos_scale2;
#endif
static void InitStatic();
};
#if _M_SSE >= 0x501

View File

@ -456,6 +456,7 @@
<ClCompile Include="GS\Renderers\SW\GSRasterizer.cpp" />
<ClCompile Include="GS\Renderers\Common\GSRenderer.cpp" />
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp" />
<ClCompile Include="GS\Renderers\HW\GSRendererHWMultiISA.cpp" />
<ClCompile Include="GS\Renderers\Null\GSRendererNull.cpp" />
<ClCompile Include="GS\Renderers\SW\GSRendererSW.cpp" />
<ClCompile Include="GS\Window\GSSetting.cpp" />

View File

@ -1607,6 +1607,9 @@
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\HW\GSRendererHWMultiISA.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\HW\GSTextureCache.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>

View File

@ -309,6 +309,7 @@
<ClCompile Include="GS\Renderers\SW\GSRasterizer.cpp" />
<ClCompile Include="GS\Renderers\Common\GSRenderer.cpp" />
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp" />
<ClCompile Include="GS\Renderers\HW\GSRendererHWMultiISA.cpp" />
<ClCompile Include="GS\Renderers\Null\GSRendererNull.cpp" />
<ClCompile Include="GS\Renderers\SW\GSRendererSW.cpp" />
<ClCompile Include="GS\Window\GSSetting.cpp" />

View File

@ -1100,6 +1100,9 @@
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\HW\GSRendererHWMultiISA.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\HW\GSTextureCache.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>