diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index f6a723d3da..96f4b775d7 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -649,6 +649,7 @@ set(pcsx2GSSources GS/Renderers/SW/GSRasterizer.cpp GS/Renderers/SW/GSRendererSW.cpp GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp + GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp @@ -719,6 +720,7 @@ set(pcsx2GSHeaders GS/Renderers/SW/GSRendererSW.h GS/Renderers/SW/GSScanlineEnvironment.h GS/Renderers/SW/GSSetupPrimCodeGenerator.h + GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h GS/Renderers/SW/GSTextureCacheSW.h GS/Renderers/SW/GSTextureSW.h GS/Renderers/SW/GSVertexSW.h diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp new file mode 100644 index 0000000000..b646813732 --- /dev/null +++ b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp @@ -0,0 +1,566 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "GS/GS_types.h" +#include "GSSetupPrimCodeGenerator.all.h" +#include "GSVertexSW.h" + +using namespace Xbyak; + +#define _rip_local(field) ((is32 || m_rip) ? ptr[rip + (char*)&m_local.field] : ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)]) + +#define _64_m_local _64_t0 + +/// On AVX, does a v-prefixed separate destination operation +/// On SSE, moves src1 into dst using movdqa, then does the operation +#define THREEARG(operation, dst, src1, ...) \ + do \ + { \ + if (hasAVX) \ + { \ + v##operation(dst, src1, __VA_ARGS__); \ + } \ + else \ + { \ + movdqa(dst, src1); \ + operation(dst, __VA_ARGS__); \ + } \ + } while (0) + +#if _M_SSE >= 0x501 + #define _rip_local_d(x) _rip_local(d8.x) + #define _rip_local_d_p(x) _rip_local_d(p.x) +#else + #define _rip_local_d(x) _rip_local(d4.x) + #define _rip_local_d_p(x) _rip_local_d(x) +#endif + +GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key) + : _parent(base, cpu) + , m_local(*(GSScanlineLocalData*)param) + , m_rip(false), many_regs(false) + // On x86 arg registers are very temporary but on x64 they aren't, so on x86 some registers overlap +#ifdef _WIN32 + , _64_vertex(is64 ? rcx : r8) + , _index(is64 ? rdx : rcx) + , _dscan(is64 ? r8 : rdx) + , _64_t0(r9), t1(is64 ? r10 : rcx) +#else + , _64_vertex(is64 ? rdi : r8) + , _index(is64 ? rsi : rcx) + , _dscan(rdx) + , _64_t0(is64 ? rcx : r8), t1(is64 ? r8 : rcx) +#endif + , _m_local(chooseLocal(&m_local, _64_m_local)) +{ + m_sel.key = key; + + m_en.z = m_sel.zb ? 1 : 0; + m_en.f = m_sel.fb && m_sel.fge ? 1 : 0; + m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0; + m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0; +} + +void GSSetupPrimCodeGenerator2::broadcastf128(const XYm& reg, const Address& mem) +{ +#if SETUP_PRIM_USING_YMM + vbroadcastf128(reg, mem); +#else + movaps(reg, mem); +#endif +} + +void GSSetupPrimCodeGenerator2::Generate() +{ + // Technically we just need the delta < 2GB + m_rip = (size_t)&m_local < 0x80000000 && (size_t)getCurr() < 0x80000000; + + bool needs_shift = (m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip; + many_regs = is64 && isYmm && !m_sel.notest && needs_shift; + +#ifdef _WIN64 + int needs_saving = many_regs ? 6 : m_sel.notest ? 0 : 2; + if (needs_saving) + { + sub(rsp, 8 + 16 * needs_saving); + for (int i = 0; i < needs_saving; i++) + { + movdqa(ptr[rsp + i * 16], Xmm(i + 6)); + } + } +#endif + + if (is64 && !m_rip) + mov(_64_m_local, (size_t)&m_local); + + if (needs_shift) + { + if (is32) + mov(_dscan, ptr[rsp + _32_dscan]); + + if (isXmm) + mov(rax, (size_t)g_const->m_shift_128b); + else + mov(rax, (size_t)g_const->m_shift_256b); + + for (int i = 0; i < (m_sel.notest ? 2 : many_regs ? 9 : 5); i++) + { + movaps(XYm(3 + i), ptr[rax + i * vecsize]); + } + } + + if (isXmm) + Depth_XMM(); + else + Depth_YMM(); + + Texture(); + + Color(); + +#ifdef _WIN64 + if (needs_saving) + { + for (int i = 0; i < needs_saving; i++) + { + movdqa(Xmm(i + 6), ptr[rsp + i * 16]); + } + add(rsp, 8 + 16 * needs_saving); + } +#endif + if (isYmm) + vzeroupper(); + ret(); +} + +void GSSetupPrimCodeGenerator2::Depth_XMM() +{ + if (!m_en.z && !m_en.f) + { + return; + } + + if (m_sel.prim != GS_SPRITE_CLASS) + { + // GSVector4 p = dscan.p; + + + movaps(xmm0, ptr[_dscan + offsetof(GSVertexSW, p)]); + + if (m_en.f) + { + // GSVector4 df = p.wwww(); + + THREEARG(shufps, xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); + + // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); + + THREEARG(mulps, xmm2, xmm1, xmm3); + cvttps2dq(xmm2, xmm2); + pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); + pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); + movdqa(_rip_local_d_p(f), xmm2); + + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) + { + // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); + + THREEARG(mulps, xmm2, xmm1, XYm(4 + i)); + cvttps2dq(xmm2, xmm2); + pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); + pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); + movdqa(_rip_local(d[i].f), xmm2); + } + } + + if (m_en.z) + { + // GSVector4 dz = p.zzzz(); + + shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); + + // m_local.d4.z = dz * 4.0f; + + THREEARG(mulps, xmm1, xmm0, xmm3); + movdqa(_rip_local_d_p(z), xmm1); + + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) + { + // m_local.d[i].z = dz * m_shift[i]; + + THREEARG(mulps, xmm1, xmm0, XYm(4 + i)); + movdqa(_rip_local(d[i].z), xmm1); + } + } + } + else + { + // GSVector4 p = vertex[index[1]].p; + + if (is32) + mov(_index, ptr[rsp + _32_index]); + mov(eax, ptr[_index + sizeof(uint32) * 1]); + shl(eax, 6); // * sizeof(GSVertexSW) + if (is64) + add(rax, _64_vertex); + else + add(rax, ptr[rsp + _32_vertex]); + + if (m_en.f) + { + // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); + movaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]); + + cvttps2dq(xmm1, xmm0); + pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); + pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); + movdqa(_rip_local(p.f), xmm1); + } + + if (m_en.z) + { + // uint32 z is bypassed in t.w + + movdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]); + pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); + movdqa(_rip_local(p.z), xmm0); + } + } +} + +void GSSetupPrimCodeGenerator2::Depth_YMM() +{ + if (!m_en.z && !m_en.f) + { + return; + } + + if (m_sel.prim != GS_SPRITE_CLASS) + { + // GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]); + + broadcastf128(xym0, ptr[_dscan + offsetof(GSVertexSW, p)]); + + vmulps(ymm1, ymm0, ymm3); + + if (m_en.z) + { + // m_local.d8.p.z = dp8.extract32<2>(); + + extractps(_rip_local_d_p(z), xmm1, 2); + + // GSVector8 dz = GSVector8(dscan.p).zzzz(); + + vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); + } + + if (m_en.f) + { + // m_local.d8.p.f = GSVector4i(dp8).extract32<3>(); + + cvtps2dq(ymm1, ymm1); + pextrd(_rip_local_d_p(f), xmm1, 3); + + // GSVector8 df = GSVector8(dscan.p).wwww(); + + vshufps(ymm1, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3)); + } + + for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++) + { + if (m_en.z) + { + // m_local.d[i].z = dz * shift[1 + i]; + + // Save a byte in the encoding for ymm8-11 by swapping with ymm2 (multiplication is communative) + if (i < 4 || many_regs) + vmulps(ymm0, Ymm(4 + i), ymm2); + else + vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); + movaps(_rip_local(d[i].z), ymm0); + } + + if (m_en.f) + { + // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh(); + + if (i < 4 || many_regs) + vmulps(ymm0, Ymm(4 + i), ymm1); + else + vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]); + cvttps2dq(ymm0, ymm0); + pshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); + pshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); + movdqa(_rip_local(d[i].f), ymm0); + } + } + } + else + { + // GSVector4 p = vertex[index[1]].p; + + if (is32) + mov(_index, ptr[rsp + _32_index]); + mov(eax, ptr[_index + sizeof(uint32) * 1]); + shl(eax, 6); // * sizeof(GSVertexSW) + if (is64) + add(rax, _64_vertex); + else + add(rax, ptr[rsp + _32_vertex]); + + if (m_en.f) + { + // m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>(); + + movaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]); + cvttps2dq(xmm0, xmm0); + pextrd(_rip_local(p.f), xmm0, 3); + } + + if (m_en.z) + { + // m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w + + mov(t1.cvt32(), ptr[rax + offsetof(GSVertexSW, t.w)]); + mov(_rip_local(p.z), t1.cvt32()); + } + } +} + +void GSSetupPrimCodeGenerator2::Texture() +{ + if (!m_en.t) + { + return; + } + + // GSVector4 t = dscan.t; + + broadcastf128(xym0, ptr[_dscan + offsetof(GSVertexSW, t)]); + + THREEARG(mulps, xmm1, xmm0, xmm3); + + if (m_sel.fst) + { + // m_local.d4.stq = GSVector4i(t * 4.0f); + + cvttps2dq(xmm1, xmm1); + + movdqa(_rip_local_d(stq), xmm1); + } + else + { + // m_local.d4.stq = t * 4.0f; + + movaps(_rip_local_d(stq), xmm1); + } + + for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) + { + // GSVector4 ds = t.xxxx(); + // GSVector4 dt = t.yyyy(); + // GSVector4 dq = t.zzzz(); + + THREEARG(shufps, xym1, xym0, xym0, _MM_SHUFFLE(j, j, j, j)); + + for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++) + { + // GSVector4 v = ds/dt * m_shift[i]; + + if (i < 4 || many_regs) + THREEARG(mulps, xym2, XYm(4 + i), xym1); + else + vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]); + + if (m_sel.fst) + { + // m_local.d[i].s/t = GSVector4i(v); + + cvttps2dq(xym2, xym2); + + switch (j) + { + case 0: movdqa(_rip_local(d[i].s), xym2); break; + case 1: movdqa(_rip_local(d[i].t), xym2); break; + } + } + else + { + // m_local.d[i].s/t/q = v; + + switch (j) + { + case 0: movaps(_rip_local(d[i].s), xym2); break; + case 1: movaps(_rip_local(d[i].t), xym2); break; + case 2: movaps(_rip_local(d[i].q), xym2); break; + } + } + } + } +} + +void GSSetupPrimCodeGenerator2::Color() +{ + if (!m_en.c) + { + return; + } + + if (m_sel.iip) + { + // GSVector4 c = dscan.c; + + broadcastf128(xym0, ptr[_dscan + offsetof(GSVertexSW, c)]); + + // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); + + THREEARG(mulps, xmm1, xmm0, xmm3); + cvttps2dq(xmm1, xmm1); + pshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0)); + packssdw(xmm1, xmm1); + if (isXmm) + movdqa(_rip_local_d(c), xmm1); + else + movq(_rip_local_d(c), xmm1); + + // xym3 is not needed anymore + + // GSVector4 dr = c.xxxx(); + // GSVector4 db = c.zzzz(); + + THREEARG(shufps, xym2, xym0, xym0, _MM_SHUFFLE(0, 0, 0, 0)); + THREEARG(shufps, xym3, xym0, xym0, _MM_SHUFFLE(2, 2, 2, 2)); + + for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++) + { + // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); + + if (i < 4 || many_regs) + THREEARG(mulps, xym0, XYm(4 + i), xym2); + else + vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); + cvttps2dq(xym0, xym0); + packssdw(xym0, xym0); + + // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); + + if (i < 4 || many_regs) + THREEARG(mulps, xym1, XYm(4 + i), xym3); + else + vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); + cvttps2dq(xym1, xym1); + packssdw(xym1, xym1); + + // m_local.d[i].rb = r.upl16(b); + + punpcklwd(xym0, xym1); + movdqa(_rip_local(d[i].rb), xym0); + } + + // GSVector4 c = dscan.c; + + broadcastf128(xym0, ptr[_dscan + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it + + // GSVector4 dg = c.yyyy(); + // GSVector4 da = c.wwww(); + + THREEARG(shufps, xym2, xym0, xym0, _MM_SHUFFLE(1, 1, 1, 1)); + THREEARG(shufps, xym3, xym0, xym0, _MM_SHUFFLE(3, 3, 3, 3)); + + for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++) + { + // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); + + if (i < 4 || many_regs) + THREEARG(mulps, xym0, XYm(4 + i), xym2); + else + vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); + cvttps2dq(xym0, xym0); + packssdw(xym0, xym0); + + // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); + + if (i < 4 || many_regs) + THREEARG(mulps, xym1, XYm(4 + i), xym3); + else + vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); + cvttps2dq(xym1, xym1); + packssdw(xym1, xym1); + + // m_local.d[i].ga = g.upl16(a); + + punpcklwd(xym0, xym1); + movdqa(_rip_local(d[i].ga), xym0); + } + } + else + { + // GSVector4i c = GSVector4i(vertex[index[last].c); + + int last = 0; + + switch (m_sel.prim) + { + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; + case GS_TRIANGLE_CLASS: last = 2; break; + case GS_SPRITE_CLASS: last = 1; break; + } + + if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() + { + if (is32) + mov(_index, ptr[rsp + _32_index]); + mov(eax, ptr[_index + sizeof(uint32) * last]); + shl(eax, 6); // * sizeof(GSVertexSW) + if (is64) + add(rax, _64_vertex); + else + add(rax, ptr[rsp + _32_vertex]); + } + + if (isXmm) + { + cvttps2dq(xmm0, ptr[rax + offsetof(GSVertexSW, c)]); + } + else + { + vbroadcasti128(ymm0, ptr[rax + offsetof(GSVertexSW, c)]); + cvttps2dq(ymm0, ymm0); + } + + // c = c.upl16(c.zwxy()); + + pshufd(xym1, xym0, _MM_SHUFFLE(1, 0, 3, 2)); + punpcklwd(xym0, xym1); + + // if(!tme) c = c.srl16(7); + + if (m_sel.tfx == TFX_NONE) + { + psrlw(xym0, 7); + } + + // m_local.c.rb = c.xxxx(); + // m_local.c.ga = c.zzzz(); + + pshufd(xym1, xym0, _MM_SHUFFLE(0, 0, 0, 0)); + pshufd(xym2, xym0, _MM_SHUFFLE(2, 2, 2, 2)); + + movdqa(_rip_local(c.rb), xym1); + movdqa(_rip_local(c.ga), xym2); + } +} diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h new file mode 100644 index 0000000000..df68e9f479 --- /dev/null +++ b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h @@ -0,0 +1,83 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "GSScanlineEnvironment.h" +#include "GSNewCodeGenerator.h" + +#if _M_SSE >= 0x501 + #define SETUP_PRIM_VECTOR_REGISTER Xbyak::Ymm + #define SETUP_PRIM_USING_XMM 0 + #define SETUP_PRIM_USING_YMM 1 +#else + #define SETUP_PRIM_VECTOR_REGISTER Xbyak::Xmm + #define SETUP_PRIM_USING_XMM 1 + #define SETUP_PRIM_USING_YMM 0 +#endif + +class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator +{ + using _parent = GSNewCodeGenerator; + using XYm = SETUP_PRIM_VECTOR_REGISTER; + + using Xmm = Xbyak::Xmm; + using Ymm = Xbyak::Ymm; + + /// On x86-64 we reserve a bunch of GPRs for holding addresses of locals that would otherwise be hard to reach + /// On x86-32 the same values are just raw 32-bit addresses + using LocalAddr = Choose3264::type; + + constexpr static bool isXmm = std::is_same::value; + constexpr static bool isYmm = std::is_same::value; + constexpr static int vecsize = isXmm ? 16 : 32; + + constexpr static int dsize = isXmm ? 4 : 8; + + constexpr static int _32_args = 0; + constexpr static int _invalid = 0xaaaaaaaa; + constexpr static int _32_vertex = is64 ? _invalid : _32_args + 4; + constexpr static int _32_index = is64 ? _invalid : _32_args + 8; + constexpr static int _32_dscan = is64 ? _invalid : _32_args + 12; + + GSScanlineSelector m_sel; + GSScanlineLocalData& m_local; + bool m_rip; + bool many_regs; + + struct {uint32 z:1, f:1, t:1, c:1;} m_en; + + const XYm xym0{0}, xym1{1}, xym2{2}, xym3{3}, xym4{4}, xym5{5}, xym6{6}, xym7{7}, xym8{8}, xym9{9}, xym10{10}, xym11{11}, xym12{12}, xym13{13}, xym14{14}, xym15{15}; + const AddressReg _64_vertex, _index, _dscan, _64_t0, t1; + const LocalAddr _m_local; + /// Returns the first arg on 32-bit, second on 64-bit + static LocalAddr chooseLocal(const void* addr32, AddressReg reg64) + { + return choose3264((size_t)addr32, reg64); + } + +public: + GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key); + void Generate(); + +private: + /// Broadcast 128 bits of floats from memory to the whole register, whatever size that register might be + void broadcastf128(const XYm& reg, const Xbyak::Address& mem); + + void Depth_XMM(); + void Depth_YMM(); + void Texture(); + void Color(); +}; diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp index f001fcb782..4d90992ead 100644 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp +++ b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp @@ -15,6 +15,7 @@ #include "PrecompiledHeader.h" #include "GSSetupPrimCodeGenerator.h" +#include "GSSetupPrimCodeGenerator.all.h" using namespace Xbyak; @@ -30,12 +31,5 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0; m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0; -#if _M_SSE >= 0x501 - Generate_AVX2(); -#else - if (m_cpu.has(util::Cpu::tAVX)) - Generate_AVX(); - else - Generate_SSE(); -#endif + GSSetupPrimCodeGenerator2(this, CPUInfo(m_cpu), param, key).Generate(); } diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index c3c7998e79..64131f397b 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -491,6 +491,7 @@ + @@ -855,6 +856,7 @@ + diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 9d897cf908..87288df283 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -1544,6 +1544,9 @@ System\Ps2\GS\Renderers\Software + + System\Ps2\GS\Renderers\Software + System\Ps2\GS\Renderers\Software @@ -2646,6 +2649,9 @@ System\Ps2\GS\Renderers\Software + + System\Ps2\GS\Renderers\Software + System\Ps2\GS\Renderers\Software