diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt
index 96f4b775d7..3e9cda2898 100644
--- a/pcsx2/CMakeLists.txt
+++ b/pcsx2/CMakeLists.txt
@@ -650,12 +650,6 @@ set(pcsx2GSSources
GS/Renderers/SW/GSRendererSW.cpp
GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp
GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp
- GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp
- GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp
- GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp
- GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp
- GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp
- GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp
GS/Renderers/SW/GSTextureCacheSW.cpp
GS/Renderers/SW/GSTextureSW.cpp
GS/Renderers/OpenGL/GLLoader.cpp
diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h
index 2bda1b0777..121e6c26c9 100644
--- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h
+++ b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h
@@ -32,23 +32,6 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
uint32 z : 1, f : 1, t : 1, c : 1;
} m_en;
-#if _M_SSE < 0x501
- void Generate_SSE();
- void Depth_SSE();
- void Texture_SSE();
- void Color_SSE();
-
- void Generate_AVX();
- void Depth_AVX();
- void Texture_AVX();
- void Color_AVX();
-#else
- void Generate_AVX2();
- void Depth_AVX2();
- void Texture_AVX2();
- void Color_AVX2();
-#endif
-
public:
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
};
diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp
deleted file mode 100644
index d965b49bc8..0000000000
--- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp
+++ /dev/null
@@ -1,365 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2021 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#include "PrecompiledHeader.h"
-#include "GSSetupPrimCodeGenerator.h"
-#include "GSVertexSW.h"
-#include "GS/GS_codegen.h"
-
-#if _M_SSE < 0x501 && (defined(_M_AMD64) || defined(_WIN64))
-
-#define _rip_local(field) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offsetof(GSScanlineLocalData, field)])
-#define _rip_local_v(field, offset) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offset])
-
-void GSSetupPrimCodeGenerator::Generate_AVX()
-{
- // Technically we just need the delta < 2GB
- m_rip = (size_t)&m_local < 0x80000000 && (size_t)getCurr() < 0x80000000;
-
-#ifdef _WIN64
- sub(rsp, 8 + 2 * 16);
-
- vmovdqa(ptr[rsp + 0], xmm6);
- vmovdqa(ptr[rsp + 16], xmm7);
-#endif
-
- if (!m_rip)
- mov(t0, (size_t)&m_local);
-
- if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
- {
- mov(rax, (size_t)g_const->m_shift_128b);
-
- for (int i = 0; i < (m_sel.notest ? 2 : 5); i++)
- {
- vmovaps(Xmm(3 + i), ptr[rax + i * 16]);
- }
- }
-
- Depth_AVX();
-
- Texture_AVX();
-
- Color_AVX();
-
-#ifdef _WIN64
- vmovdqa(xmm6, ptr[rsp + 0]);
- vmovdqa(xmm7, ptr[rsp + 16]);
-
- add(rsp, 8 + 2 * 16);
-#endif
-
- ret();
-}
-
-void GSSetupPrimCodeGenerator::Depth_AVX()
-{
- if (!m_en.z && !m_en.f)
- {
- return;
- }
-
- if (m_sel.prim != GS_SPRITE_CLASS)
- {
- // GSVector4 p = dscan.p;
-
- vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, p)]);
-
- if (m_en.f)
- {
- // GSVector4 df = p.wwww();
-
- vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
-
- // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh();
-
- vmulps(xmm2, xmm1, xmm3);
- vcvttps2dq(xmm2, xmm2);
- vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- vmovdqa(_rip_local(d4.f), xmm2);
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
-
- vmulps(xmm2, xmm1, Xmm(4 + i));
- vcvttps2dq(xmm2, xmm2);
- vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0]));
- vmovdqa(_rip_local_v(d[i].f, variableOffset), xmm2);
- }
- }
-
- if (m_en.z)
- {
- // GSVector4 dz = p.zzzz();
-
- vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- // m_local.d4.z = dz * 4.0f;
-
- vmulps(xmm1, xmm0, xmm3);
- vmovdqa(_rip_local(d4.z), xmm1);
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // m_local.d[i].z = dz * m_shift[i];
-
- vmulps(xmm1, xmm0, Xmm(4 + i));
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0]));
- vmovdqa(_rip_local_v(d[i].z, variableOffset), xmm1);
- }
- }
- }
- else
- {
- // GSVector4 p = vertex[index[1]].p;
-
- mov(eax, ptr[a1 + sizeof(uint32) * 1]);
- shl(eax, 6); // * sizeof(GSVertexSW)
- add(rax, a0);
-
- if (m_en.f)
- {
- // m_local.p.f = GSVector4i(p).zzzzh().zzzz();
- vmovaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]);
-
- vcvttps2dq(xmm1, xmm0);
- vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
- vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
- vmovdqa(_rip_local(p.f), xmm1);
- }
-
- if (m_en.z)
- {
- // uint32 z is bypassed in t.w
-
- vmovdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
- vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
- vmovdqa(_rip_local(p.z), xmm0);
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Texture_AVX()
-{
- if (!m_en.t)
- {
- return;
- }
-
- // GSVector4 t = dscan.t;
-
- vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, t)]);
-
- vmulps(xmm1, xmm0, xmm3);
-
- if (m_sel.fst)
- {
- // m_local.d4.stq = GSVector4i(t * 4.0f);
-
- vcvttps2dq(xmm1, xmm1);
-
- vmovdqa(_rip_local(d4.stq), xmm1);
- }
- else
- {
- // m_local.d4.stq = t * 4.0f;
-
- vmovaps(_rip_local(d4.stq), xmm1);
- }
-
- for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
- {
- // GSVector4 ds = t.xxxx();
- // GSVector4 dt = t.yyyy();
- // GSVector4 dq = t.zzzz();
-
- vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4 v = ds/dt * m_shift[i];
-
- vmulps(xmm2, xmm1, Xmm(4 + i));
-
- if (m_sel.fst)
- {
- // m_local.d[i].s/t = GSVector4i(v);
-
- vcvttps2dq(xmm2, xmm2);
-
- const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0]));
-
- switch (j)
- {
- case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), xmm2); break;
- case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), xmm2); break;
- }
- }
- else
- {
- // m_local.d[i].s/t/q = v;
-
- const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0]));
-
- switch (j)
- {
- case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), xmm2); break;
- case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), xmm2); break;
- case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), xmm2); break;
- }
- }
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Color_AVX()
-{
- if (!m_en.c)
- {
- return;
- }
-
- if (m_sel.iip)
- {
- // GSVector4 c = dscan.c;
-
- vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, c)]);
-
- // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
-
- vmulps(xmm1, xmm0, xmm3);
- vcvttps2dq(xmm1, xmm1);
- vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0));
- vpackssdw(xmm1, xmm1);
- vmovdqa(_rip_local(d4.c), xmm1);
-
- // xmm3 is not needed anymore
-
- // GSVector4 dr = c.xxxx();
- // GSVector4 db = c.zzzz();
-
- vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
- vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
-
- vmulps(xmm0, xmm2, Xmm(4 + i));
- vcvttps2dq(xmm0, xmm0);
- vpackssdw(xmm0, xmm0);
-
- // GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
-
- vmulps(xmm1, xmm3, Xmm(4 + i));
- vcvttps2dq(xmm1, xmm1);
- vpackssdw(xmm1, xmm1);
-
- // m_local.d[i].rb = r.upl16(b);
-
- vpunpcklwd(xmm0, xmm1);
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0]));
- vmovdqa(_rip_local_v(d[i].rb, variableOffset), xmm0);
- }
-
- // GSVector4 c = dscan.c;
-
- vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it
-
- // GSVector4 dg = c.yyyy();
- // GSVector4 da = c.wwww();
-
- vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
- vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
-
- vmulps(xmm0, xmm2, Xmm(4 + i));
- vcvttps2dq(xmm0, xmm0);
- vpackssdw(xmm0, xmm0);
-
- // GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
-
- vmulps(xmm1, xmm3, Xmm(4 + i));
- vcvttps2dq(xmm1, xmm1);
- vpackssdw(xmm1, xmm1);
-
- // m_local.d[i].ga = g.upl16(a);
-
- vpunpcklwd(xmm0, xmm1);
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0]));
- vmovdqa(_rip_local_v(d[i].ga, variableOffset), xmm0);
- }
- }
- else
- {
- // GSVector4i c = GSVector4i(vertex[index[last].c);
-
- int last = 0;
-
- switch (m_sel.prim)
- {
- case GS_POINT_CLASS: last = 0; break;
- case GS_LINE_CLASS: last = 1; break;
- case GS_TRIANGLE_CLASS: last = 2; break;
- case GS_SPRITE_CLASS: last = 1; break;
- }
-
- if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
- {
- mov(eax, ptr[a1 + sizeof(uint32) * last]);
- shl(eax, 6); // * sizeof(GSVertexSW)
- add(rax, a0);
- }
-
- vcvttps2dq(xmm0, ptr[rax + offsetof(GSVertexSW, c)]);
-
- // c = c.upl16(c.zwxy());
-
- vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2));
- vpunpcklwd(xmm0, xmm1);
-
- // if(!tme) c = c.srl16(7);
-
- if (m_sel.tfx == TFX_NONE)
- {
- vpsrlw(xmm0, 7);
- }
-
- // m_local.c.rb = c.xxxx();
- // m_local.c.ga = c.zzzz();
-
- vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
- vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- vmovdqa(_rip_local(c.rb), xmm1);
- vmovdqa(_rip_local(c.ga), xmm2);
- }
-}
-
-#endif
diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp
deleted file mode 100644
index 916f4e682b..0000000000
--- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2021 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#include "PrecompiledHeader.h"
-#include "GSSetupPrimCodeGenerator.h"
-#include "GSVertexSW.h"
-#include "GS/GS_codegen.h"
-
-#if _M_SSE >= 0x501 && (defined(_M_AMD64) || defined(_WIN64))
-
-#define _rip_local(field) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offsetof(GSScanlineLocalData, field)])
-#define _rip_local_v(field, offset) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offset])
-
-#define _m_shift(i) (Ymm(7 + i))
-
-// FIXME windows ?
-#define _vertex rcx
-
-void GSSetupPrimCodeGenerator::Generate_AVX2()
-{
- // Technically we just need the delta < 2GB
- m_rip = (size_t)&m_local < 0x80000000 && (size_t)getCurr() < 0x80000000;
-
-#ifdef _WIN64
- sub(rsp, 8 + 2 * 16);
-
- vmovdqa(ptr[rsp + 0], ymm6);
- vmovdqa(ptr[rsp + 16], ymm7);
-#endif
-
- if (!m_rip)
- mov(t0, (size_t)&m_local);
-
- if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
- {
- mov(rax, (size_t)g_const->m_shift_256b);
-
- for (int i = 0; i < (m_sel.notest ? 2 : 9); i++)
- {
- vmovaps(_m_shift(i), ptr[rax + i * 32]);
- }
- }
- // ymm7 to ymm 15 = m_shift[i]
-
- Depth_AVX2();
-
- Texture_AVX2();
-
- Color_AVX2();
-
-#ifdef _WIN64
- vmovdqa(ymm6, ptr[rsp + 0]);
- vmovdqa(ymm7, ptr[rsp + 16]);
-
- add(rsp, 8 + 2 * 16);
-#endif
-
- ret();
-}
-
-void GSSetupPrimCodeGenerator::Depth_AVX2()
-{
- if (!m_en.z && !m_en.f)
- {
- return;
- }
-
- if (m_sel.prim != GS_SPRITE_CLASS)
- {
- const Ymm& dscan_p = ymm6;
-
- // GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]);
-
- vbroadcastf128(dscan_p, ptr[a2 + offsetof(GSVertexSW, p)]);
-
- vmulps(ymm1, dscan_p, _m_shift(0));
-
- if (m_en.z)
- {
- // m_local.d8.p.z = dp8.extract32<2>();
-
- vextractps(_rip_local(d8.p.z), xmm1, 2);
-
- // GSVector8 dz = GSVector8(dscan.p).zzzz();
-
- vshufps(ymm2, dscan_p, dscan_p, _MM_SHUFFLE(2, 2, 2, 2));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- // m_local.d[i].z = dz * shift[1 + i];
-
- vmulps(ymm0, ymm2, _m_shift(1 + i));
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0]));
- vmovaps(_rip_local_v(d[i].z, variableOffset), ymm0);
- }
- }
-
- if (m_en.f)
- {
- // m_local.d8.p.f = GSVector4i(dp8).extract32<3>();
-
- // FIXME no truncate ? why ? vcvttps2dq ?
- //vcvtps2dq(ymm2, ymm1); // let's guess a typo
- vcvttps2dq(ymm2, ymm1);
- vpextrd(_rip_local(d8.p.f), xmm2, 3);
-
- // GSVector8 df = GSVector8(dscan.p).wwww();
-
- vshufps(ymm3, dscan_p, dscan_p, _MM_SHUFFLE(3, 3, 3, 3));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh();
-
- vmulps(ymm0, ymm3, _m_shift(1 + i));
- vcvttps2dq(ymm0, ymm0);
-
- vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
- vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0]));
- vmovdqa(_rip_local_v(d[i].f, variableOffset), ymm0);
- }
- }
- }
- else
- {
- // GSVector4 p = vertex[index[1]].p;
-
- mov(_vertex.cvt32(), ptr[a1 + sizeof(uint32) * 1]);
- shl(_vertex.cvt32(), 6); // * sizeof(GSVertexSW)
- add(_vertex, a0);
-
- if (m_en.f)
- {
- // m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>();
-
- vmovaps(xmm0, ptr[_vertex + offsetof(GSVertexSW, p)]);
- vcvttps2dq(xmm0, xmm0);
- vpextrd(_rip_local(p.f), xmm0, 3);
- }
-
- if (m_en.z)
- {
- // m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
-
- mov(eax, ptr[ecx + offsetof(GSVertexSW, t.w)]);
- mov(_rip_local(p.z), eax);
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Texture_AVX2()
-{
- if (!m_en.t)
- {
- return;
- }
-
- // GSVector8 dt(dscan.t);
-
- vbroadcastf128(ymm0, ptr[a2 + offsetof(GSVertexSW, t)]);
-
- // GSVector8 dt8 = dt * shift[0];
-
- vmulps(ymm1, ymm0, _m_shift(0));
-
- if (m_sel.fst)
- {
- // m_local.84.stq = GSVector4i(t * 4.0f);
-
- vcvttps2dq(ymm1, ymm1);
-
- vmovdqa(_rip_local(d8.stq), xmm1);
- }
- else
- {
- // m_local.d8.stq = t * 4.0f;
-
- vmovaps(_rip_local(d8.stq), xmm1);
- }
-
- for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
- {
- // GSVector8 dstq = dt.xxxx/yyyy/zzzz();
-
- vshufps(ymm1, ymm0, ymm0, (uint8)_MM_SHUFFLE(j, j, j, j));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- // GSVector8 v = dstq * shift[1 + i];
-
- vmulps(ymm2, ymm1, _m_shift(1 + i));
-
- if (m_sel.fst)
- {
- // m_local.d[i].s/t = GSVector8::cast(GSVector8i(v));
-
- vcvttps2dq(ymm2, ymm2);
-
- const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0]));
-
- switch (j)
- {
- case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), ymm2); break;
- case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), ymm2); break;
- }
- }
- else
- {
- // m_local.d[i].s/t/q = v;
-
- const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0]));
-
- switch (j)
- {
- case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), ymm2); break;
- case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), ymm2); break;
- case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), ymm2); break;
- }
- }
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Color_AVX2()
-{
- if (!m_en.c)
- {
- return;
- }
-
- if (m_sel.iip)
- {
- const Ymm& dscan_c = ymm6;
-
- // GSVector8 dc(dscan.c);
-
- vbroadcastf128(dscan_c, ptr[a2 + offsetof(GSVertexSW, c)]);
-
- // m_local.d8.c = GSVector4i(c * 4.0f).xzyw().ps32();
-
- vmulps(ymm1, dscan_c, ymm3);
- vcvttps2dq(ymm1, ymm1);
- vpshufd(ymm1, ymm1, _MM_SHUFFLE(3, 1, 2, 0));
- vpackssdw(ymm1, ymm1);
- vmovq(_rip_local(d8.c), xmm1);
-
- // GSVector8 dr = dc.xxxx();
- // GSVector8 db = dc.zzzz();
-
- vshufps(ymm2, dscan_c, dscan_c, _MM_SHUFFLE(0, 0, 0, 0));
- vshufps(ymm3, dscan_c, dscan_c, _MM_SHUFFLE(2, 2, 2, 2));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32();
-
- vmulps(ymm0, ymm2, _m_shift(1 + i));
- vcvttps2dq(ymm0, ymm0);
- vpackssdw(ymm0, ymm0);
-
- // GSVector4i b = GSVector8i(db * shift[1 + i]).ps32();
-
- vmulps(ymm1, ymm3, _m_shift(1 + i));
- vcvttps2dq(ymm1, ymm1);
- vpackssdw(ymm1, ymm1);
-
- // m_local.d[i].rb = r.upl16(b);
-
- vpunpcklwd(ymm0, ymm1);
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0]));
- vmovdqa(_rip_local_v(d[i].rb, variableOffset), ymm0);
- }
-
- // GSVector8 dg = dc.yyyy();
- // GSVector8 da = dc.wwww();
-
- vshufps(ymm2, dscan_c, dscan_c, _MM_SHUFFLE(1, 1, 1, 1));
- vshufps(ymm3, dscan_c, dscan_c, _MM_SHUFFLE(3, 3, 3, 3));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32();
-
- vmulps(ymm0, ymm2, _m_shift(1 + i));
- vcvttps2dq(ymm0, ymm0);
- vpackssdw(ymm0, ymm0);
-
- // GSVector8i a = GSVector8i(da * shift[1 + i]).ps32();
-
- vmulps(ymm1, ymm3, _m_shift(1 + i));
- vcvttps2dq(ymm1, ymm1);
- vpackssdw(ymm1, ymm1);
-
- // m_local.d[i].ga = g.upl16(a);
-
- vpunpcklwd(ymm0, ymm1);
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0]));
- vmovdqa(_rip_local_v(d[i].ga, variableOffset), ymm0);
- }
- }
- else
- {
- // GSVector4i c = GSVector4i(vertex[index[last].c);
-
- int last = 0;
-
- switch (m_sel.prim)
- {
- case GS_POINT_CLASS: last = 0; break;
- case GS_LINE_CLASS: last = 1; break;
- case GS_TRIANGLE_CLASS: last = 2; break;
- case GS_SPRITE_CLASS: last = 1; break;
- }
-
- if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
- {
- mov(_vertex.cvt32(), ptr[a1 + sizeof(uint32) * last]);
- shl(_vertex.cvt32(), 6); // * sizeof(GSVertexSW)
- add(_vertex, a0);
- }
-
- vbroadcasti128(ymm0, ptr[_vertex + offsetof(GSVertexSW, c)]);
- vcvttps2dq(ymm0, ymm0);
-
- // c = c.upl16(c.zwxy());
-
- vpshufd(ymm1, ymm0, _MM_SHUFFLE(1, 0, 3, 2));
- vpunpcklwd(ymm0, ymm1);
-
- // if(!tme) c = c.srl16(7);
-
- if (m_sel.tfx == TFX_NONE)
- {
- vpsrlw(ymm0, 7);
- }
-
- // m_local.c.rb = c.xxxx();
- // m_local.c.ga = c.zzzz();
-
- vpshufd(ymm1, ymm0, _MM_SHUFFLE(0, 0, 0, 0));
- vpshufd(ymm2, ymm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- vmovdqa(_rip_local(c.rb), ymm1);
- vmovdqa(_rip_local(c.ga), ymm2);
- }
-}
-
-#endif
diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp
deleted file mode 100644
index 3a12b46106..0000000000
--- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp
+++ /dev/null
@@ -1,374 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2021 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#include "PrecompiledHeader.h"
-#include "GSSetupPrimCodeGenerator.h"
-#include "GSVertexSW.h"
-#include "GS/GS_codegen.h"
-
-#if _M_SSE < 0x501 && (defined(_M_AMD64) || defined(_WIN64))
-
-void GSSetupPrimCodeGenerator::Generate_SSE()
-{
-#ifdef _WIN64
- sub(rsp, 8 + 2 * 16);
-
- vmovdqa(ptr[rsp + 0], xmm6);
- vmovdqa(ptr[rsp + 16], xmm7);
-#endif
-
- mov(t0, (size_t)&m_local);
-
- if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
- {
- mov(rax, (size_t)g_const->m_shift_128b[0]);
-
- for (int i = 0; i < (m_sel.notest ? 2 : 5); i++)
- {
- movaps(Xmm(3 + i), ptr[rax + i * 16]);
- }
- }
-
- Depth_SSE();
-
- Texture_SSE();
-
- Color_SSE();
-
-#ifdef _WIN64
- vmovdqa(xmm6, ptr[rsp + 0]);
- vmovdqa(xmm7, ptr[rsp + 16]);
-
- add(rsp, 8 + 2 * 16);
-#endif
-
- ret();
-}
-
-void GSSetupPrimCodeGenerator::Depth_SSE()
-{
- if (!m_en.z && !m_en.f)
- {
- return;
- }
-
- if (m_sel.prim != GS_SPRITE_CLASS)
- {
- // GSVector4 p = dscan.p;
-
- movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, p)]);
-
- if (m_en.f)
- {
- // GSVector4 df = p.wwww();
-
- movaps(xmm1, xmm0);
- shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
-
- // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh();
-
- movaps(xmm2, xmm1);
- mulps(xmm2, xmm3);
- cvttps2dq(xmm2, xmm2);
- pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.f)], xmm2);
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
-
- movaps(xmm2, xmm1);
- mulps(xmm2, Xmm(4 + i));
- cvttps2dq(xmm2, xmm2);
- pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0]));
- movdqa(ptr[t0 + variableOffset], xmm2);
- }
- }
-
- if (m_en.z)
- {
- // GSVector4 dz = p.zzzz();
-
- shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- // m_local.d4.z = dz * 4.0f;
-
- movaps(xmm1, xmm0);
- mulps(xmm1, xmm3);
- movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.z)], xmm1);
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // m_local.d[i].z = dz * m_shift[i];
-
- movaps(xmm1, xmm0);
- mulps(xmm1, Xmm(4 + i));
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0]));
- movdqa(ptr[t0 + variableOffset], xmm1);
- }
- }
- }
- else
- {
- // GSVector4 p = vertex[index[1]].p;
-
- mov(eax, ptr[a1 + sizeof(uint32) * 1]);
- shl(eax, 6); // * sizeof(GSVertexSW)
- add(rax, a0);
-
- movaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]);
-
- if (m_en.f)
- {
- // m_local.p.f = GSVector4i(p).zzzzh().zzzz();
-
- cvttps2dq(xmm1, xmm0);
- pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
- pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
- movdqa(ptr[t0 + offsetof(GSScanlineLocalData, p.f)], xmm1);
- }
-
- if (m_en.z)
- {
- // uint32 z is bypassed in t.w
-
- vmovdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
- vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
- vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, p.z)], xmm0);
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Texture_SSE()
-{
- if (!m_en.t)
- {
- return;
- }
-
- // GSVector4 t = dscan.t;
-
- movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, t)]);
-
- movaps(xmm1, xmm0);
- mulps(xmm1, xmm3);
-
- if (m_sel.fst)
- {
- // m_local.d4.stq = GSVector4i(t * 4.0f);
-
- cvttps2dq(xmm1, xmm1);
-
- movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
- }
- else
- {
- // m_local.d4.stq = t * 4.0f;
-
- movaps(ptr[t0 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
- }
-
- for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
- {
- // GSVector4 ds = t.xxxx();
- // GSVector4 dt = t.yyyy();
- // GSVector4 dq = t.zzzz();
-
- movaps(xmm1, xmm0);
- shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4 v = ds/dt * m_shift[i];
-
- movaps(xmm2, xmm1);
- mulps(xmm2, Xmm(4 + i));
-
- if (m_sel.fst)
- {
- // m_local.d[i].s/t = GSVector4i(v);
-
- cvttps2dq(xmm2, xmm2);
-
- const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0]));
-
- switch (j)
- {
- case 0: movdqa(ptr[t0 + variableOffsetS], xmm2); break;
- case 1: movdqa(ptr[t0 + variableOffsetT], xmm2); break;
- }
- }
- else
- {
- // m_local.d[i].s/t/q = v;
-
- const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0]));
- const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0]));
-
- switch (j)
- {
- case 0: movaps(ptr[t0 + variableOffsetS], xmm2); break;
- case 1: movaps(ptr[t0 + variableOffsetT], xmm2); break;
- case 2: movaps(ptr[t0 + variableOffsetQ], xmm2); break;
- }
- }
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Color_SSE()
-{
- if (!m_en.c)
- {
- return;
- }
-
- if (m_sel.iip)
- {
- // GSVector4 c = dscan.c;
-
- movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, c)]);
- movaps(xmm1, xmm0);
-
- // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
-
- movaps(xmm2, xmm0);
- mulps(xmm2, xmm3);
- cvttps2dq(xmm2, xmm2);
- pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0));
- packssdw(xmm2, xmm2);
- movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.c)], xmm2);
-
- // xmm3 is not needed anymore
-
- // GSVector4 dr = c.xxxx();
- // GSVector4 db = c.zzzz();
-
- shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
- shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
-
- movaps(xmm2, xmm0);
- mulps(xmm2, Xmm(4 + i));
- cvttps2dq(xmm2, xmm2);
- packssdw(xmm2, xmm2);
-
- // GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
-
- movaps(xmm3, xmm1);
- mulps(xmm3, Xmm(4 + i));
- cvttps2dq(xmm3, xmm3);
- packssdw(xmm3, xmm3);
-
- // m_local.d[i].rb = r.upl16(b);
-
- punpcklwd(xmm2, xmm3);
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0]));
- movdqa(ptr[t0 + variableOffset], xmm2);
- }
-
- // GSVector4 c = dscan.c;
-
- movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it
- movaps(xmm1, xmm0);
-
- // GSVector4 dg = c.yyyy();
- // GSVector4 da = c.wwww();
-
- shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
- shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
-
- movaps(xmm2, xmm0);
- mulps(xmm2, Xmm(4 + i));
- cvttps2dq(xmm2, xmm2);
- packssdw(xmm2, xmm2);
-
- // GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
-
- movaps(xmm3, xmm1);
- mulps(xmm3, Xmm(4 + i));
- cvttps2dq(xmm3, xmm3);
- packssdw(xmm3, xmm3);
-
- // m_local.d[i].ga = g.upl16(a);
-
- punpcklwd(xmm2, xmm3);
-
- const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0]));
- movdqa(ptr[t0 + variableOffset], xmm2);
- }
- }
- else
- {
- // GSVector4i c = GSVector4i(vertex[index[last].c);
-
- int last = 0;
-
- switch (m_sel.prim)
- {
- case GS_POINT_CLASS: last = 0; break;
- case GS_LINE_CLASS: last = 1; break;
- case GS_TRIANGLE_CLASS: last = 2; break;
- case GS_SPRITE_CLASS: last = 1; break;
- }
-
- if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
- {
- mov(eax, ptr[a1 + sizeof(uint32) * last]);
- shl(eax, 6); // * sizeof(GSVertexSW)
- add(rax, a0);
- }
-
- cvttps2dq(xmm0, ptr[rax + offsetof(GSVertexSW, c)]);
-
- // c = c.upl16(c.zwxy());
-
- pshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2));
- punpcklwd(xmm0, xmm1);
-
- // if(!tme) c = c.srl16(7);
-
- if (m_sel.tfx == TFX_NONE)
- {
- psrlw(xmm0, 7);
- }
-
- // m_local.c.rb = c.xxxx();
- // m_local.c.ga = c.zzzz();
-
- pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
- pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- movdqa(ptr[t0 + offsetof(GSScanlineLocalData, c.rb)], xmm1);
- movdqa(ptr[t0 + offsetof(GSScanlineLocalData, c.ga)], xmm2);
- }
-}
-
-#endif
diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp
deleted file mode 100644
index 555abda7a7..0000000000
--- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp
+++ /dev/null
@@ -1,335 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2021 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#include "PrecompiledHeader.h"
-#include "GSSetupPrimCodeGenerator.h"
-#include "GSVertexSW.h"
-#include "GS/GS_codegen.h"
-
-#if _M_SSE < 0x501 && !(defined(_M_AMD64) || defined(_WIN64))
-
-static const int _args = 0;
-static const int _vertex = _args + 4;
-static const int _index = _args + 8;
-static const int _dscan = _args + 12;
-
-void GSSetupPrimCodeGenerator::Generate_AVX()
-{
- if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
- {
- mov(edx, dword[esp + _dscan]);
-
- for (int i = 0; i < (m_sel.notest ? 2 : 5); i++)
- {
- vmovaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]);
- }
- }
-
- Depth_AVX();
-
- Texture_AVX();
-
- Color_AVX();
-
- ret();
-}
-
-void GSSetupPrimCodeGenerator::Depth_AVX()
-{
- if (!m_en.z && !m_en.f)
- {
- return;
- }
-
- if (m_sel.prim != GS_SPRITE_CLASS)
- {
- // GSVector4 p = dscan.p;
-
- vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, p)]);
-
- if (m_en.f)
- {
- // GSVector4 df = p.wwww();
-
- vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
-
- // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh();
-
- vmulps(xmm2, xmm1, xmm3);
- vcvttps2dq(xmm2, xmm2);
- vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- vmovdqa(ptr[&m_local.d4.f], xmm2);
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
-
- vmulps(xmm2, xmm1, Xmm(4 + i));
- vcvttps2dq(xmm2, xmm2);
- vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- vmovdqa(ptr[&m_local.d[i].f], xmm2);
- }
- }
-
- if (m_en.z)
- {
- // GSVector4 dz = p.zzzz();
-
- vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- // m_local.d4.z = dz * 4.0f;
-
- vmulps(xmm1, xmm0, xmm3);
- vmovdqa(ptr[&m_local.d4.z], xmm1);
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // m_local.d[i].z = dz * m_shift[i];
-
- vmulps(xmm1, xmm0, Xmm(4 + i));
- vmovdqa(ptr[&m_local.d[i].z], xmm1);
- }
- }
- }
- else
- {
- // GSVector4 p = vertex[index[1]].p;
-
- mov(ecx, ptr[esp + _index]);
- mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
- shl(ecx, 6); // * sizeof(GSVertexSW)
- add(ecx, ptr[esp + _vertex]);
-
- vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
-
- if (m_en.f)
- {
- // m_local.p.f = GSVector4i(p).zzzzh().zzzz();
-
- vcvttps2dq(xmm1, xmm0);
- vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
- vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
- vmovdqa(ptr[&m_local.p.f], xmm1);
- }
-
- if (m_en.z)
- {
- // uint32 z is bypassed in t.w
-
- vmovdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]);
- vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
- vmovdqa(ptr[&m_local.p.z], xmm0);
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Texture_AVX()
-{
- if (!m_en.t)
- {
- return;
- }
-
- // GSVector4 t = dscan.t;
-
- vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, t)]);
-
- vmulps(xmm1, xmm0, xmm3);
-
- if (m_sel.fst)
- {
- // m_local.d4.stq = GSVector4i(t * 4.0f);
-
- vcvttps2dq(xmm1, xmm1);
-
- vmovdqa(ptr[&m_local.d4.stq], xmm1);
- }
- else
- {
- // m_local.d4.stq = t * 4.0f;
-
- vmovaps(ptr[&m_local.d4.stq], xmm1);
- }
-
- for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
- {
- // GSVector4 ds = t.xxxx();
- // GSVector4 dt = t.yyyy();
- // GSVector4 dq = t.zzzz();
-
- vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4 v = ds/dt * m_shift[i];
-
- vmulps(xmm2, xmm1, Xmm(4 + i));
-
- if (m_sel.fst)
- {
- // m_local.d[i].s/t = GSVector4i(v);
-
- vcvttps2dq(xmm2, xmm2);
-
- switch (j)
- {
- case 0: vmovdqa(ptr[&m_local.d[i].s], xmm2); break;
- case 1: vmovdqa(ptr[&m_local.d[i].t], xmm2); break;
- }
- }
- else
- {
- // m_local.d[i].s/t/q = v;
-
- switch (j)
- {
- case 0: vmovaps(ptr[&m_local.d[i].s], xmm2); break;
- case 1: vmovaps(ptr[&m_local.d[i].t], xmm2); break;
- case 2: vmovaps(ptr[&m_local.d[i].q], xmm2); break;
- }
- }
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Color_AVX()
-{
- if (!m_en.c)
- {
- return;
- }
-
- if (m_sel.iip)
- {
- // GSVector4 c = dscan.c;
-
- vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]);
-
- // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
-
- vmulps(xmm1, xmm0, xmm3);
- vcvttps2dq(xmm1, xmm1);
- vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0));
- vpackssdw(xmm1, xmm1);
- vmovdqa(ptr[&m_local.d4.c], xmm1);
-
- // xmm3 is not needed anymore
-
- // GSVector4 dr = c.xxxx();
- // GSVector4 db = c.zzzz();
-
- vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
- vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
-
- vmulps(xmm0, xmm2, Xmm(4 + i));
- vcvttps2dq(xmm0, xmm0);
- vpackssdw(xmm0, xmm0);
-
- // GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
-
- vmulps(xmm1, xmm3, Xmm(4 + i));
- vcvttps2dq(xmm1, xmm1);
- vpackssdw(xmm1, xmm1);
-
- // m_local.d[i].rb = r.upl16(b);
-
- vpunpcklwd(xmm0, xmm1);
- vmovdqa(ptr[&m_local.d[i].rb], xmm0);
- }
-
- // GSVector4 c = dscan.c;
-
- vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it
-
- // GSVector4 dg = c.yyyy();
- // GSVector4 da = c.wwww();
-
- vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
- vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
-
- vmulps(xmm0, xmm2, Xmm(4 + i));
- vcvttps2dq(xmm0, xmm0);
- vpackssdw(xmm0, xmm0);
-
- // GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
-
- vmulps(xmm1, xmm3, Xmm(4 + i));
- vcvttps2dq(xmm1, xmm1);
- vpackssdw(xmm1, xmm1);
-
- // m_local.d[i].ga = g.upl16(a);
-
- vpunpcklwd(xmm0, xmm1);
- vmovdqa(ptr[&m_local.d[i].ga], xmm0);
- }
- }
- else
- {
- // GSVector4i c = GSVector4i(vertex[index[last].c);
-
- int last = 0;
-
- switch (m_sel.prim)
- {
- case GS_POINT_CLASS: last = 0; break;
- case GS_LINE_CLASS: last = 1; break;
- case GS_TRIANGLE_CLASS: last = 2; break;
- case GS_SPRITE_CLASS: last = 1; break;
- }
-
- if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
- {
- mov(ecx, ptr[esp + _index]);
- mov(ecx, ptr[ecx + sizeof(uint32) * last]);
- shl(ecx, 6); // * sizeof(GSVertexSW)
- add(ecx, ptr[esp + _vertex]);
- }
-
- vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
-
- // c = c.upl16(c.zwxy());
-
- vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2));
- vpunpcklwd(xmm0, xmm1);
-
- // if(!tme) c = c.srl16(7);
-
- if (m_sel.tfx == TFX_NONE)
- {
- vpsrlw(xmm0, 7);
- }
-
- // m_local.c.rb = c.xxxx();
- // m_local.c.ga = c.zzzz();
-
- vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
- vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- vmovdqa(ptr[&m_local.c.rb], xmm1);
- vmovdqa(ptr[&m_local.c.ga], xmm2);
- }
-}
-
-#endif
diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp
deleted file mode 100644
index a3f9ee653d..0000000000
--- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp
+++ /dev/null
@@ -1,360 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2021 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#include "PrecompiledHeader.h"
-#include "GSSetupPrimCodeGenerator.h"
-#include "GSVertexSW.h"
-#include "GS/GS_codegen.h"
-
-#if _M_SSE >= 0x501 && !(defined(_M_AMD64) || defined(_WIN64))
-
-static const int _args = 0;
-static const int _vertex = _args + 4;
-static const int _index = _args + 8;
-static const int _dscan = _args + 12;
-
-void GSSetupPrimCodeGenerator::Generate_AVX2()
-{
- if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
- {
- mov(edx, dword[esp + _dscan]);
-
- for (int i = 0; i < (m_sel.notest ? 2 : 5); i++)
- {
- vmovaps(Ymm(3 + i), ptr[g_const->m_shift_256b[i]]);
- }
- }
-
- Depth_AVX2();
-
- Texture_AVX2();
-
- Color_AVX2();
-
- ret();
-}
-
-void GSSetupPrimCodeGenerator::Depth_AVX2()
-{
- if (!m_en.z && !m_en.f)
- {
- return;
- }
-
- if (m_sel.prim != GS_SPRITE_CLASS)
- {
- // GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]);
-
- vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, p)]);
-
- vmulps(ymm1, ymm0, ymm3);
-
- if (m_en.z)
- {
- // m_local.d8.p.z = dp8.extract32<2>();
-
- vextractps(ptr[&m_local.d8.p.z], xmm1, 2);
- }
-
- if (m_en.f)
- {
- // m_local.d8.p.f = GSVector4i(dp8).extract32<3>();
-
- vcvtps2dq(ymm2, ymm1);
- vpextrd(ptr[&m_local.d8.p.f], xmm2, 3);
- }
-
- if (m_en.z)
- {
- // GSVector8 dz = GSVector8(dscan.p).zzzz();
-
- vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2));
- }
-
- if (m_en.f)
- {
- // GSVector8 df = GSVector8(dscan.p).wwww();
-
- vshufps(ymm1, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3));
- }
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- if (m_en.z)
- {
- // m_local.d[i].z = dz * shift[1 + i];
-
- if (i < 4)
- vmulps(ymm0, ymm2, Ymm(4 + i));
- else
- vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
- vmovaps(ptr[&m_local.d[i].z], ymm0);
- }
-
- if (m_en.f)
- {
- // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh();
-
- if (i < 4)
- vmulps(ymm0, ymm1, Ymm(4 + i));
- else
- vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
- vcvttps2dq(ymm0, ymm0);
- vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
- vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
- vmovdqa(ptr[&m_local.d[i].f], ymm0);
- }
- }
- }
- else
- {
- // GSVector4 p = vertex[index[1]].p;
-
- mov(ecx, ptr[esp + _index]);
- mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
- shl(ecx, 6); // * sizeof(GSVertexSW)
- add(ecx, ptr[esp + _vertex]);
-
- if (m_en.f)
- {
- // m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>();
-
- vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
- vcvttps2dq(xmm0, xmm0);
- vpextrd(ptr[&m_local.p.f], xmm0, 3);
- }
-
- if (m_en.z)
- {
- // m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
-
- mov(eax, ptr[ecx + offsetof(GSVertexSW, t.w)]);
- mov(ptr[&m_local.p.z], eax);
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Texture_AVX2()
-{
- if (!m_en.t)
- {
- return;
- }
-
- // GSVector8 dt(dscan.t);
-
- vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, t)]);
-
- // GSVector8 dt8 = dt * shift[0];
-
- vmulps(ymm1, ymm0, ymm3);
-
- if (m_sel.fst)
- {
- // m_local.d8.stq = GSVector8::cast(GSVector8i(dt8));
-
- vcvttps2dq(ymm1, ymm1);
-
- vmovdqa(ptr[&m_local.d8.stq], xmm1);
- }
- else
- {
- // m_local.d8.stq = dt8;
-
- vmovaps(ptr[&m_local.d8.stq], xmm1);
- }
-
- for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
- {
- // GSVector8 dstq = dt.xxxx/yyyy/zzzz();
-
- vshufps(ymm1, ymm0, ymm0, (uint8)_MM_SHUFFLE(j, j, j, j));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- // GSVector8 v = dstq * shift[1 + i];
-
- if (i < 4)
- vmulps(ymm2, ymm1, Ymm(4 + i));
- else
- vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
-
- if (m_sel.fst)
- {
- // m_local.d[i].s/t = GSVector8::cast(GSVector8i(v));
-
- vcvttps2dq(ymm2, ymm2);
-
- switch (j)
- {
- case 0: vmovdqa(ptr[&m_local.d[i].s], ymm2); break;
- case 1: vmovdqa(ptr[&m_local.d[i].t], ymm2); break;
- }
- }
- else
- {
- // m_local.d[i].s/t/q = v;
-
- switch (j)
- {
- case 0: vmovaps(ptr[&m_local.d[i].s], ymm2); break;
- case 1: vmovaps(ptr[&m_local.d[i].t], ymm2); break;
- case 2: vmovaps(ptr[&m_local.d[i].q], ymm2); break;
- }
- }
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Color_AVX2()
-{
- if (!m_en.c)
- {
- return;
- }
-
- if (m_sel.iip)
- {
- // GSVector8 dc(dscan.c);
-
- vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, c)]);
-
- // m_local.d8.c = GSVector8i(dc * shift[0]).xzyw().ps32();
-
- vmulps(ymm1, ymm0, ymm3);
- vcvttps2dq(ymm1, ymm1);
- vpshufd(ymm1, ymm1, _MM_SHUFFLE(3, 1, 2, 0));
- vpackssdw(ymm1, ymm1);
- vmovq(ptr[&m_local.d8.c], xmm1);
-
- // ymm3 is not needed anymore
-
- // GSVector8 dr = dc.xxxx();
- // GSVector8 db = dc.zzzz();
-
- vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(0, 0, 0, 0));
- vshufps(ymm3, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32();
-
- if (i < 4)
- vmulps(ymm0, ymm2, Ymm(4 + i));
- else
- vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
- vcvttps2dq(ymm0, ymm0);
- vpackssdw(ymm0, ymm0);
-
- // GSVector4i b = GSVector8i(db * shift[1 + i]).ps32();
-
- if (i < 4)
- vmulps(ymm1, ymm3, Ymm(4 + i));
- else
- vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
- vcvttps2dq(ymm1, ymm1);
- vpackssdw(ymm1, ymm1);
-
- // m_local.d[i].rb = r.upl16(b);
-
- vpunpcklwd(ymm0, ymm1);
- vmovdqa(ptr[&m_local.d[i].rb], ymm0);
- }
-
- // GSVector8 dc(dscan.c);
-
- vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it
-
- // GSVector8 dg = dc.yyyy();
- // GSVector8 da = dc.wwww();
-
- vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(1, 1, 1, 1));
- vshufps(ymm3, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 8); i++)
- {
- // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32();
-
- if (i < 4)
- vmulps(ymm0, ymm2, Ymm(4 + i));
- else
- vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
- vcvttps2dq(ymm0, ymm0);
- vpackssdw(ymm0, ymm0);
-
- // GSVector8i a = GSVector8i(da * shift[1 + i]).ps32();
-
- if (i < 4)
- vmulps(ymm1, ymm3, Ymm(4 + i));
- else
- vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
- vcvttps2dq(ymm1, ymm1);
- vpackssdw(ymm1, ymm1);
-
- // m_local.d[i].ga = g.upl16(a);
-
- vpunpcklwd(ymm0, ymm1);
- vmovdqa(ptr[&m_local.d[i].ga], ymm0);
- }
- }
- else
- {
- // GSVector8i c = GSVector8i(GSVector8(vertex[index[last]].c));
-
- int last = 0;
-
- switch (m_sel.prim)
- {
- case GS_POINT_CLASS: last = 0; break;
- case GS_LINE_CLASS: last = 1; break;
- case GS_TRIANGLE_CLASS: last = 2; break;
- case GS_SPRITE_CLASS: last = 1; break;
- }
-
- if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
- {
- mov(ecx, ptr[esp + _index]);
- mov(ecx, ptr[ecx + sizeof(uint32) * last]);
- shl(ecx, 6); // * sizeof(GSVertexSW)
- add(ecx, ptr[esp + _vertex]);
- }
-
- vbroadcasti128(ymm0, ptr[ecx + offsetof(GSVertexSW, c)]);
- vcvttps2dq(ymm0, ymm0);
-
- // c = c.upl16(c.zwxy());
-
- vpshufd(ymm1, ymm0, _MM_SHUFFLE(1, 0, 3, 2));
- vpunpcklwd(ymm0, ymm1);
-
- // if(!tme) c = c.srl16(7);
-
- if (m_sel.tfx == TFX_NONE)
- {
- vpsrlw(ymm0, 7);
- }
-
- // m_local.c.rb = c.xxxx();
- // m_local.c.ga = c.zzzz();
-
- vpshufd(ymm1, ymm0, _MM_SHUFFLE(0, 0, 0, 0));
- vpshufd(ymm2, ymm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- vmovdqa(ptr[&m_local.c.rb], ymm1);
- vmovdqa(ptr[&m_local.c.ga], ymm2);
- }
-}
-
-#endif
diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp
deleted file mode 100644
index 0b70d8ffc7..0000000000
--- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp
+++ /dev/null
@@ -1,350 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2021 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#include "PrecompiledHeader.h"
-#include "GSSetupPrimCodeGenerator.h"
-#include "GSVertexSW.h"
-#include "GS/GS_codegen.h"
-
-#if _M_SSE < 0x501 && !(defined(_M_AMD64) || defined(_WIN64))
-
-static const int _args = 0;
-static const int _vertex = _args + 4;
-static const int _index = _args + 8;
-static const int _dscan = _args + 12;
-
-void GSSetupPrimCodeGenerator::Generate_SSE()
-{
- if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
- {
- mov(edx, dword[esp + _dscan]);
-
- for (int i = 0; i < (m_sel.notest ? 2 : 5); i++)
- {
- movaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]);
- }
- }
-
- Depth_SSE();
-
- Texture_SSE();
-
- Color_SSE();
-
- ret();
-}
-
-void GSSetupPrimCodeGenerator::Depth_SSE()
-{
- if (!m_en.z && !m_en.f)
- {
- return;
- }
-
- if (m_sel.prim != GS_SPRITE_CLASS)
- {
- // GSVector4 p = dscan.p;
-
- movaps(xmm0, ptr[edx + offsetof(GSVertexSW, p)]);
-
- if (m_en.f)
- {
- // GSVector4 df = p.wwww();
-
- movaps(xmm1, xmm0);
- shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
-
- // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh();
-
- movaps(xmm2, xmm1);
- mulps(xmm2, xmm3);
- cvttps2dq(xmm2, xmm2);
- pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- movdqa(ptr[&m_local.d4.f], xmm2);
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
-
- movaps(xmm2, xmm1);
- mulps(xmm2, Xmm(4 + i));
- cvttps2dq(xmm2, xmm2);
- pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
- movdqa(ptr[&m_local.d[i].f], xmm2);
- }
- }
-
- if (m_en.z)
- {
- // GSVector4 dz = p.zzzz();
-
- shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- // m_local.d4.z = dz * 4.0f;
-
- movaps(xmm1, xmm0);
- mulps(xmm1, xmm3);
- movdqa(ptr[&m_local.d4.z], xmm1);
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // m_local.d[i].z = dz * m_shift[i];
-
- movaps(xmm1, xmm0);
- mulps(xmm1, Xmm(4 + i));
- movdqa(ptr[&m_local.d[i].z], xmm1);
- }
- }
- }
- else
- {
- // GSVector4 p = vertex[index[1]].p;
-
- mov(ecx, ptr[esp + _index]);
- mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
- shl(ecx, 6); // * sizeof(GSVertexSW)
- add(ecx, ptr[esp + _vertex]);
-
- movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
-
- if (m_en.f)
- {
- // m_local.p.f = GSVector4i(p).zzzzh().zzzz();
-
- cvttps2dq(xmm1, xmm0);
- pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
- pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
- movdqa(ptr[&m_local.p.f], xmm1);
- }
-
- if (m_en.z)
- {
- // uint32 z is bypassed in t.w
-
- movdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]);
- pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
- movdqa(ptr[&m_local.p.z], xmm0);
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Texture_SSE()
-{
- if (!m_en.t)
- {
- return;
- }
-
- // GSVector4 t = dscan.t;
-
- movaps(xmm0, ptr[edx + offsetof(GSVertexSW, t)]);
-
- movaps(xmm1, xmm0);
- mulps(xmm1, xmm3);
-
- if (m_sel.fst)
- {
- // m_local.d4.stq = GSVector4i(t * 4.0f);
-
- cvttps2dq(xmm1, xmm1);
-
- movdqa(ptr[&m_local.d4.stq], xmm1);
- }
- else
- {
- // m_local.d4.stq = t * 4.0f;
-
- movaps(ptr[&m_local.d4.stq], xmm1);
- }
-
- for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
- {
- // GSVector4 ds = t.xxxx();
- // GSVector4 dt = t.yyyy();
- // GSVector4 dq = t.zzzz();
-
- movaps(xmm1, xmm0);
- shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4 v = ds/dt * m_shift[i];
-
- movaps(xmm2, xmm1);
- mulps(xmm2, Xmm(4 + i));
-
- if (m_sel.fst)
- {
- // m_local.d[i].s/t = GSVector4i(v);
-
- cvttps2dq(xmm2, xmm2);
-
- switch (j)
- {
- case 0: movdqa(ptr[&m_local.d[i].s], xmm2); break;
- case 1: movdqa(ptr[&m_local.d[i].t], xmm2); break;
- }
- }
- else
- {
- // m_local.d[i].s/t/q = v;
-
- switch (j)
- {
- case 0: movaps(ptr[&m_local.d[i].s], xmm2); break;
- case 1: movaps(ptr[&m_local.d[i].t], xmm2); break;
- case 2: movaps(ptr[&m_local.d[i].q], xmm2); break;
- }
- }
- }
- }
-}
-
-void GSSetupPrimCodeGenerator::Color_SSE()
-{
- if (!m_en.c)
- {
- return;
- }
-
- if (m_sel.iip)
- {
- // GSVector4 c = dscan.c;
-
- movaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]);
- movaps(xmm1, xmm0);
-
- // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
-
- movaps(xmm2, xmm0);
- mulps(xmm2, xmm3);
- cvttps2dq(xmm2, xmm2);
- pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0));
- packssdw(xmm2, xmm2);
- movdqa(ptr[&m_local.d4.c], xmm2);
-
- // xmm3 is not needed anymore
-
- // GSVector4 dr = c.xxxx();
- // GSVector4 db = c.zzzz();
-
- shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
- shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
-
- movaps(xmm2, xmm0);
- mulps(xmm2, Xmm(4 + i));
- cvttps2dq(xmm2, xmm2);
- packssdw(xmm2, xmm2);
-
- // GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
-
- movaps(xmm3, xmm1);
- mulps(xmm3, Xmm(4 + i));
- cvttps2dq(xmm3, xmm3);
- packssdw(xmm3, xmm3);
-
- // m_local.d[i].rb = r.upl16(b);
-
- punpcklwd(xmm2, xmm3);
- movdqa(ptr[&m_local.d[i].rb], xmm2);
- }
-
- // GSVector4 c = dscan.c;
-
- movaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it
- movaps(xmm1, xmm0);
-
- // GSVector4 dg = c.yyyy();
- // GSVector4 da = c.wwww();
-
- shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
- shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
-
- for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
- {
- // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
-
- movaps(xmm2, xmm0);
- mulps(xmm2, Xmm(4 + i));
- cvttps2dq(xmm2, xmm2);
- packssdw(xmm2, xmm2);
-
- // GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
-
- movaps(xmm3, xmm1);
- mulps(xmm3, Xmm(4 + i));
- cvttps2dq(xmm3, xmm3);
- packssdw(xmm3, xmm3);
-
- // m_local.d[i].ga = g.upl16(a);
-
- punpcklwd(xmm2, xmm3);
- movdqa(ptr[&m_local.d[i].ga], xmm2);
- }
- }
- else
- {
- // GSVector4i c = GSVector4i(vertex[index[last].c);
-
- int last = 0;
-
- switch (m_sel.prim)
- {
- case GS_POINT_CLASS: last = 0; break;
- case GS_LINE_CLASS: last = 1; break;
- case GS_TRIANGLE_CLASS: last = 2; break;
- case GS_SPRITE_CLASS: last = 1; break;
- }
-
- if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
- {
- mov(ecx, ptr[esp + _index]);
- mov(ecx, ptr[ecx + sizeof(uint32) * last]);
- shl(ecx, 6); // * sizeof(GSVertexSW)
- add(ecx, ptr[esp + _vertex]);
- }
-
- cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
-
- // c = c.upl16(c.zwxy());
-
- pshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2));
- punpcklwd(xmm0, xmm1);
-
- // if(!tme) c = c.srl16(7);
-
- if (m_sel.tfx == TFX_NONE)
- {
- psrlw(xmm0, 7);
- }
-
- // m_local.c.rb = c.xxxx();
- // m_local.c.ga = c.zzzz();
-
- pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
- pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
-
- movdqa(ptr[&m_local.c.rb], xmm1);
- movdqa(ptr[&m_local.c.ga], xmm2);
- }
-}
-
-#endif
diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj
index 64131f397b..2c787b648a 100644
--- a/pcsx2/pcsx2.vcxproj
+++ b/pcsx2/pcsx2.vcxproj
@@ -492,12 +492,6 @@
-
-
-
-
-
-
diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters
index 87288df283..f734cc6721 100644
--- a/pcsx2/pcsx2.vcxproj.filters
+++ b/pcsx2/pcsx2.vcxproj.filters
@@ -1544,27 +1544,6 @@
System\Ps2\GS\Renderers\Software
-
- System\Ps2\GS\Renderers\Software
-
-
- System\Ps2\GS\Renderers\Software
-
-
- System\Ps2\GS\Renderers\Software
-
-
- System\Ps2\GS\Renderers\Software
-
-
- System\Ps2\GS\Renderers\Software
-
-
- System\Ps2\GS\Renderers\Software
-
-
- System\Ps2\GS\Renderers\Software
-
System\Ps2\GS\Renderers\Software