From 805b647c736410303fdd5a2307501a439227833c Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 28 Aug 2021 00:02:24 -0500 Subject: [PATCH] GS: Remove old SetupPrim code generators --- pcsx2/CMakeLists.txt | 6 - .../Renderers/SW/GSSetupPrimCodeGenerator.h | 17 - .../SW/GSSetupPrimCodeGenerator.x64.avx.cpp | 365 ----------------- .../SW/GSSetupPrimCodeGenerator.x64.avx2.cpp | 368 ----------------- .../SW/GSSetupPrimCodeGenerator.x64.cpp | 374 ------------------ .../SW/GSSetupPrimCodeGenerator.x86.avx.cpp | 335 ---------------- .../SW/GSSetupPrimCodeGenerator.x86.avx2.cpp | 360 ----------------- .../SW/GSSetupPrimCodeGenerator.x86.cpp | 350 ---------------- pcsx2/pcsx2.vcxproj | 6 - pcsx2/pcsx2.vcxproj.filters | 21 - 10 files changed, 2202 deletions(-) delete mode 100644 pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp delete mode 100644 pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp delete mode 100644 pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp delete mode 100644 pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp delete mode 100644 pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp delete mode 100644 pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 96f4b775d7..3e9cda2898 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -650,12 +650,6 @@ set(pcsx2GSSources GS/Renderers/SW/GSRendererSW.cpp GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp - GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp - GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp - GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp - GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp - GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp - GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp GS/Renderers/SW/GSTextureCacheSW.cpp GS/Renderers/SW/GSTextureSW.cpp GS/Renderers/OpenGL/GLLoader.cpp diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h index 2bda1b0777..121e6c26c9 100644 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h +++ b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h @@ -32,23 +32,6 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator uint32 z : 1, f : 1, t : 1, c : 1; } m_en; -#if _M_SSE < 0x501 - void Generate_SSE(); - void Depth_SSE(); - void Texture_SSE(); - void Color_SSE(); - - void Generate_AVX(); - void Depth_AVX(); - void Texture_AVX(); - void Color_AVX(); -#else - void Generate_AVX2(); - void Depth_AVX2(); - void Texture_AVX2(); - void Color_AVX2(); -#endif - public: GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); }; diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp deleted file mode 100644 index d965b49bc8..0000000000 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp +++ /dev/null @@ -1,365 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2021 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" -#include "GS/GS_codegen.h" - -#if _M_SSE < 0x501 && (defined(_M_AMD64) || defined(_WIN64)) - -#define _rip_local(field) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offsetof(GSScanlineLocalData, field)]) -#define _rip_local_v(field, offset) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offset]) - -void GSSetupPrimCodeGenerator::Generate_AVX() -{ - // Technically we just need the delta < 2GB - m_rip = (size_t)&m_local < 0x80000000 && (size_t)getCurr() < 0x80000000; - -#ifdef _WIN64 - sub(rsp, 8 + 2 * 16); - - vmovdqa(ptr[rsp + 0], xmm6); - vmovdqa(ptr[rsp + 16], xmm7); -#endif - - if (!m_rip) - mov(t0, (size_t)&m_local); - - if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(rax, (size_t)g_const->m_shift_128b); - - for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) - { - vmovaps(Xmm(3 + i), ptr[rax + i * 16]); - } - } - - Depth_AVX(); - - Texture_AVX(); - - Color_AVX(); - -#ifdef _WIN64 - vmovdqa(xmm6, ptr[rsp + 0]); - vmovdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); -#endif - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth_AVX() -{ - if (!m_en.z && !m_en.f) - { - return; - } - - if (m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 p = dscan.p; - - vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, p)]); - - if (m_en.f) - { - // GSVector4 df = p.wwww(); - - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - - // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); - - vmulps(xmm2, xmm1, xmm3); - vcvttps2dq(xmm2, xmm2); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(_rip_local(d4.f), xmm2); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); - - vmulps(xmm2, xmm1, Xmm(4 + i)); - vcvttps2dq(xmm2, xmm2); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(_rip_local_v(d[i].f, variableOffset), xmm2); - } - } - - if (m_en.z) - { - // GSVector4 dz = p.zzzz(); - - vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d4.z = dz * 4.0f; - - vmulps(xmm1, xmm0, xmm3); - vmovdqa(_rip_local(d4.z), xmm1); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].z = dz * m_shift[i]; - - vmulps(xmm1, xmm0, Xmm(4 + i)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(_rip_local_v(d[i].z, variableOffset), xmm1); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(eax, ptr[a1 + sizeof(uint32) * 1]); - shl(eax, 6); // * sizeof(GSVertexSW) - add(rax, a0); - - if (m_en.f) - { - // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); - vmovaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]); - - vcvttps2dq(xmm1, xmm0); - vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vmovdqa(_rip_local(p.f), xmm1); - } - - if (m_en.z) - { - // uint32 z is bypassed in t.w - - vmovdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]); - vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - vmovdqa(_rip_local(p.z), xmm0); - } - } -} - -void GSSetupPrimCodeGenerator::Texture_AVX() -{ - if (!m_en.t) - { - return; - } - - // GSVector4 t = dscan.t; - - vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, t)]); - - vmulps(xmm1, xmm0, xmm3); - - if (m_sel.fst) - { - // m_local.d4.stq = GSVector4i(t * 4.0f); - - vcvttps2dq(xmm1, xmm1); - - vmovdqa(_rip_local(d4.stq), xmm1); - } - else - { - // m_local.d4.stq = t * 4.0f; - - vmovaps(_rip_local(d4.stq), xmm1); - } - - for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector4 ds = t.xxxx(); - // GSVector4 dt = t.yyyy(); - // GSVector4 dq = t.zzzz(); - - vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4 v = ds/dt * m_shift[i]; - - vmulps(xmm2, xmm1, Xmm(4 + i)); - - if (m_sel.fst) - { - // m_local.d[i].s/t = GSVector4i(v); - - vcvttps2dq(xmm2, xmm2); - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch (j) - { - case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), xmm2); break; - case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), xmm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch (j) - { - case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), xmm2); break; - case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), xmm2); break; - case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), xmm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color_AVX() -{ - if (!m_en.c) - { - return; - } - - if (m_sel.iip) - { - // GSVector4 c = dscan.c; - - vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, c)]); - - // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - vmulps(xmm1, xmm0, xmm3); - vcvttps2dq(xmm1, xmm1); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0)); - vpackssdw(xmm1, xmm1); - vmovdqa(_rip_local(d4.c), xmm1); - - // xmm3 is not needed anymore - - // GSVector4 dr = c.xxxx(); - // GSVector4 db = c.zzzz(); - - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); - - vmulps(xmm0, xmm2, Xmm(4 + i)); - vcvttps2dq(xmm0, xmm0); - vpackssdw(xmm0, xmm0); - - // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); - - vmulps(xmm1, xmm3, Xmm(4 + i)); - vcvttps2dq(xmm1, xmm1); - vpackssdw(xmm1, xmm1); - - // m_local.d[i].rb = r.upl16(b); - - vpunpcklwd(xmm0, xmm1); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(_rip_local_v(d[i].rb, variableOffset), xmm0); - } - - // GSVector4 c = dscan.c; - - vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - - // GSVector4 dg = c.yyyy(); - // GSVector4 da = c.wwww(); - - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); - - vmulps(xmm0, xmm2, Xmm(4 + i)); - vcvttps2dq(xmm0, xmm0); - vpackssdw(xmm0, xmm0); - - // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); - - vmulps(xmm1, xmm3, Xmm(4 + i)); - vcvttps2dq(xmm1, xmm1); - vpackssdw(xmm1, xmm1); - - // m_local.d[i].ga = g.upl16(a); - - vpunpcklwd(xmm0, xmm1); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(_rip_local_v(d[i].ga, variableOffset), xmm0); - } - } - else - { - // GSVector4i c = GSVector4i(vertex[index[last].c); - - int last = 0; - - switch (m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(eax, ptr[a1 + sizeof(uint32) * last]); - shl(eax, 6); // * sizeof(GSVertexSW) - add(rax, a0); - } - - vcvttps2dq(xmm0, ptr[rax + offsetof(GSVertexSW, c)]); - - // c = c.upl16(c.zwxy()); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(xmm0, xmm1); - - // if(!tme) c = c.srl16(7); - - if (m_sel.tfx == TFX_NONE) - { - vpsrlw(xmm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vmovdqa(_rip_local(c.rb), xmm1); - vmovdqa(_rip_local(c.ga), xmm2); - } -} - -#endif diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp deleted file mode 100644 index 916f4e682b..0000000000 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp +++ /dev/null @@ -1,368 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2021 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" -#include "GS/GS_codegen.h" - -#if _M_SSE >= 0x501 && (defined(_M_AMD64) || defined(_WIN64)) - -#define _rip_local(field) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offsetof(GSScanlineLocalData, field)]) -#define _rip_local_v(field, offset) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offset]) - -#define _m_shift(i) (Ymm(7 + i)) - -// FIXME windows ? -#define _vertex rcx - -void GSSetupPrimCodeGenerator::Generate_AVX2() -{ - // Technically we just need the delta < 2GB - m_rip = (size_t)&m_local < 0x80000000 && (size_t)getCurr() < 0x80000000; - -#ifdef _WIN64 - sub(rsp, 8 + 2 * 16); - - vmovdqa(ptr[rsp + 0], ymm6); - vmovdqa(ptr[rsp + 16], ymm7); -#endif - - if (!m_rip) - mov(t0, (size_t)&m_local); - - if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(rax, (size_t)g_const->m_shift_256b); - - for (int i = 0; i < (m_sel.notest ? 2 : 9); i++) - { - vmovaps(_m_shift(i), ptr[rax + i * 32]); - } - } - // ymm7 to ymm 15 = m_shift[i] - - Depth_AVX2(); - - Texture_AVX2(); - - Color_AVX2(); - -#ifdef _WIN64 - vmovdqa(ymm6, ptr[rsp + 0]); - vmovdqa(ymm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); -#endif - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth_AVX2() -{ - if (!m_en.z && !m_en.f) - { - return; - } - - if (m_sel.prim != GS_SPRITE_CLASS) - { - const Ymm& dscan_p = ymm6; - - // GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]); - - vbroadcastf128(dscan_p, ptr[a2 + offsetof(GSVertexSW, p)]); - - vmulps(ymm1, dscan_p, _m_shift(0)); - - if (m_en.z) - { - // m_local.d8.p.z = dp8.extract32<2>(); - - vextractps(_rip_local(d8.p.z), xmm1, 2); - - // GSVector8 dz = GSVector8(dscan.p).zzzz(); - - vshufps(ymm2, dscan_p, dscan_p, _MM_SHUFFLE(2, 2, 2, 2)); - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // m_local.d[i].z = dz * shift[1 + i]; - - vmulps(ymm0, ymm2, _m_shift(1 + i)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovaps(_rip_local_v(d[i].z, variableOffset), ymm0); - } - } - - if (m_en.f) - { - // m_local.d8.p.f = GSVector4i(dp8).extract32<3>(); - - // FIXME no truncate ? why ? vcvttps2dq ? - //vcvtps2dq(ymm2, ymm1); // let's guess a typo - vcvttps2dq(ymm2, ymm1); - vpextrd(_rip_local(d8.p.f), xmm2, 3); - - // GSVector8 df = GSVector8(dscan.p).wwww(); - - vshufps(ymm3, dscan_p, dscan_p, _MM_SHUFFLE(3, 3, 3, 3)); - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh(); - - vmulps(ymm0, ymm3, _m_shift(1 + i)); - vcvttps2dq(ymm0, ymm0); - - vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(_rip_local_v(d[i].f, variableOffset), ymm0); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(_vertex.cvt32(), ptr[a1 + sizeof(uint32) * 1]); - shl(_vertex.cvt32(), 6); // * sizeof(GSVertexSW) - add(_vertex, a0); - - if (m_en.f) - { - // m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>(); - - vmovaps(xmm0, ptr[_vertex + offsetof(GSVertexSW, p)]); - vcvttps2dq(xmm0, xmm0); - vpextrd(_rip_local(p.f), xmm0, 3); - } - - if (m_en.z) - { - // m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w - - mov(eax, ptr[ecx + offsetof(GSVertexSW, t.w)]); - mov(_rip_local(p.z), eax); - } - } -} - -void GSSetupPrimCodeGenerator::Texture_AVX2() -{ - if (!m_en.t) - { - return; - } - - // GSVector8 dt(dscan.t); - - vbroadcastf128(ymm0, ptr[a2 + offsetof(GSVertexSW, t)]); - - // GSVector8 dt8 = dt * shift[0]; - - vmulps(ymm1, ymm0, _m_shift(0)); - - if (m_sel.fst) - { - // m_local.84.stq = GSVector4i(t * 4.0f); - - vcvttps2dq(ymm1, ymm1); - - vmovdqa(_rip_local(d8.stq), xmm1); - } - else - { - // m_local.d8.stq = t * 4.0f; - - vmovaps(_rip_local(d8.stq), xmm1); - } - - for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector8 dstq = dt.xxxx/yyyy/zzzz(); - - vshufps(ymm1, ymm0, ymm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8 v = dstq * shift[1 + i]; - - vmulps(ymm2, ymm1, _m_shift(1 + i)); - - if (m_sel.fst) - { - // m_local.d[i].s/t = GSVector8::cast(GSVector8i(v)); - - vcvttps2dq(ymm2, ymm2); - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch (j) - { - case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), ymm2); break; - case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), ymm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch (j) - { - case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), ymm2); break; - case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), ymm2); break; - case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), ymm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color_AVX2() -{ - if (!m_en.c) - { - return; - } - - if (m_sel.iip) - { - const Ymm& dscan_c = ymm6; - - // GSVector8 dc(dscan.c); - - vbroadcastf128(dscan_c, ptr[a2 + offsetof(GSVertexSW, c)]); - - // m_local.d8.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - vmulps(ymm1, dscan_c, ymm3); - vcvttps2dq(ymm1, ymm1); - vpshufd(ymm1, ymm1, _MM_SHUFFLE(3, 1, 2, 0)); - vpackssdw(ymm1, ymm1); - vmovq(_rip_local(d8.c), xmm1); - - // GSVector8 dr = dc.xxxx(); - // GSVector8 db = dc.zzzz(); - - vshufps(ymm2, dscan_c, dscan_c, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(ymm3, dscan_c, dscan_c, _MM_SHUFFLE(2, 2, 2, 2)); - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); - - vmulps(ymm0, ymm2, _m_shift(1 + i)); - vcvttps2dq(ymm0, ymm0); - vpackssdw(ymm0, ymm0); - - // GSVector4i b = GSVector8i(db * shift[1 + i]).ps32(); - - vmulps(ymm1, ymm3, _m_shift(1 + i)); - vcvttps2dq(ymm1, ymm1); - vpackssdw(ymm1, ymm1); - - // m_local.d[i].rb = r.upl16(b); - - vpunpcklwd(ymm0, ymm1); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(_rip_local_v(d[i].rb, variableOffset), ymm0); - } - - // GSVector8 dg = dc.yyyy(); - // GSVector8 da = dc.wwww(); - - vshufps(ymm2, dscan_c, dscan_c, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(ymm3, dscan_c, dscan_c, _MM_SHUFFLE(3, 3, 3, 3)); - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); - - vmulps(ymm0, ymm2, _m_shift(1 + i)); - vcvttps2dq(ymm0, ymm0); - vpackssdw(ymm0, ymm0); - - // GSVector8i a = GSVector8i(da * shift[1 + i]).ps32(); - - vmulps(ymm1, ymm3, _m_shift(1 + i)); - vcvttps2dq(ymm1, ymm1); - vpackssdw(ymm1, ymm1); - - // m_local.d[i].ga = g.upl16(a); - - vpunpcklwd(ymm0, ymm1); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(_rip_local_v(d[i].ga, variableOffset), ymm0); - } - } - else - { - // GSVector4i c = GSVector4i(vertex[index[last].c); - - int last = 0; - - switch (m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(_vertex.cvt32(), ptr[a1 + sizeof(uint32) * last]); - shl(_vertex.cvt32(), 6); // * sizeof(GSVertexSW) - add(_vertex, a0); - } - - vbroadcasti128(ymm0, ptr[_vertex + offsetof(GSVertexSW, c)]); - vcvttps2dq(ymm0, ymm0); - - // c = c.upl16(c.zwxy()); - - vpshufd(ymm1, ymm0, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(ymm0, ymm1); - - // if(!tme) c = c.srl16(7); - - if (m_sel.tfx == TFX_NONE) - { - vpsrlw(ymm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - vpshufd(ymm1, ymm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(ymm2, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vmovdqa(_rip_local(c.rb), ymm1); - vmovdqa(_rip_local(c.ga), ymm2); - } -} - -#endif diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp deleted file mode 100644 index 3a12b46106..0000000000 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp +++ /dev/null @@ -1,374 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2021 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" -#include "GS/GS_codegen.h" - -#if _M_SSE < 0x501 && (defined(_M_AMD64) || defined(_WIN64)) - -void GSSetupPrimCodeGenerator::Generate_SSE() -{ -#ifdef _WIN64 - sub(rsp, 8 + 2 * 16); - - vmovdqa(ptr[rsp + 0], xmm6); - vmovdqa(ptr[rsp + 16], xmm7); -#endif - - mov(t0, (size_t)&m_local); - - if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(rax, (size_t)g_const->m_shift_128b[0]); - - for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) - { - movaps(Xmm(3 + i), ptr[rax + i * 16]); - } - } - - Depth_SSE(); - - Texture_SSE(); - - Color_SSE(); - -#ifdef _WIN64 - vmovdqa(xmm6, ptr[rsp + 0]); - vmovdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); -#endif - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth_SSE() -{ - if (!m_en.z && !m_en.f) - { - return; - } - - if (m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 p = dscan.p; - - movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, p)]); - - if (m_en.f) - { - // GSVector4 df = p.wwww(); - - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - - // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); - - movaps(xmm2, xmm1); - mulps(xmm2, xmm3); - cvttps2dq(xmm2, xmm2); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.f)], xmm2); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); - - movaps(xmm2, xmm1); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0])); - movdqa(ptr[t0 + variableOffset], xmm2); - } - } - - if (m_en.z) - { - // GSVector4 dz = p.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d4.z = dz * 4.0f; - - movaps(xmm1, xmm0); - mulps(xmm1, xmm3); - movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.z)], xmm1); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].z = dz * m_shift[i]; - - movaps(xmm1, xmm0); - mulps(xmm1, Xmm(4 + i)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0])); - movdqa(ptr[t0 + variableOffset], xmm1); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(eax, ptr[a1 + sizeof(uint32) * 1]); - shl(eax, 6); // * sizeof(GSVertexSW) - add(rax, a0); - - movaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]); - - if (m_en.f) - { - // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); - - cvttps2dq(xmm1, xmm0); - pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - movdqa(ptr[t0 + offsetof(GSScanlineLocalData, p.f)], xmm1); - } - - if (m_en.z) - { - // uint32 z is bypassed in t.w - - vmovdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]); - vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, p.z)], xmm0); - } - } -} - -void GSSetupPrimCodeGenerator::Texture_SSE() -{ - if (!m_en.t) - { - return; - } - - // GSVector4 t = dscan.t; - - movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, t)]); - - movaps(xmm1, xmm0); - mulps(xmm1, xmm3); - - if (m_sel.fst) - { - // m_local.d4.stq = GSVector4i(t * 4.0f); - - cvttps2dq(xmm1, xmm1); - - movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.stq)], xmm1); - } - else - { - // m_local.d4.stq = t * 4.0f; - - movaps(ptr[t0 + offsetof(GSScanlineLocalData, d4.stq)], xmm1); - } - - for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector4 ds = t.xxxx(); - // GSVector4 dt = t.yyyy(); - // GSVector4 dq = t.zzzz(); - - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4 v = ds/dt * m_shift[i]; - - movaps(xmm2, xmm1); - mulps(xmm2, Xmm(4 + i)); - - if (m_sel.fst) - { - // m_local.d[i].s/t = GSVector4i(v); - - cvttps2dq(xmm2, xmm2); - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch (j) - { - case 0: movdqa(ptr[t0 + variableOffsetS], xmm2); break; - case 1: movdqa(ptr[t0 + variableOffsetT], xmm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch (j) - { - case 0: movaps(ptr[t0 + variableOffsetS], xmm2); break; - case 1: movaps(ptr[t0 + variableOffsetT], xmm2); break; - case 2: movaps(ptr[t0 + variableOffsetQ], xmm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color_SSE() -{ - if (!m_en.c) - { - return; - } - - if (m_sel.iip) - { - // GSVector4 c = dscan.c; - - movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, c)]); - movaps(xmm1, xmm0); - - // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, xmm3); - cvttps2dq(xmm2, xmm2); - pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0)); - packssdw(xmm2, xmm2); - movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.c)], xmm2); - - // xmm3 is not needed anymore - - // GSVector4 dr = c.xxxx(); - // GSVector4 db = c.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - packssdw(xmm2, xmm2); - - // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); - - movaps(xmm3, xmm1); - mulps(xmm3, Xmm(4 + i)); - cvttps2dq(xmm3, xmm3); - packssdw(xmm3, xmm3); - - // m_local.d[i].rb = r.upl16(b); - - punpcklwd(xmm2, xmm3); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0])); - movdqa(ptr[t0 + variableOffset], xmm2); - } - - // GSVector4 c = dscan.c; - - movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - movaps(xmm1, xmm0); - - // GSVector4 dg = c.yyyy(); - // GSVector4 da = c.wwww(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - packssdw(xmm2, xmm2); - - // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); - - movaps(xmm3, xmm1); - mulps(xmm3, Xmm(4 + i)); - cvttps2dq(xmm3, xmm3); - packssdw(xmm3, xmm3); - - // m_local.d[i].ga = g.upl16(a); - - punpcklwd(xmm2, xmm3); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0])); - movdqa(ptr[t0 + variableOffset], xmm2); - } - } - else - { - // GSVector4i c = GSVector4i(vertex[index[last].c); - - int last = 0; - - switch (m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(eax, ptr[a1 + sizeof(uint32) * last]); - shl(eax, 6); // * sizeof(GSVertexSW) - add(rax, a0); - } - - cvttps2dq(xmm0, ptr[rax + offsetof(GSVertexSW, c)]); - - // c = c.upl16(c.zwxy()); - - pshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - punpcklwd(xmm0, xmm1); - - // if(!tme) c = c.srl16(7); - - if (m_sel.tfx == TFX_NONE) - { - psrlw(xmm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - movdqa(ptr[t0 + offsetof(GSScanlineLocalData, c.rb)], xmm1); - movdqa(ptr[t0 + offsetof(GSScanlineLocalData, c.ga)], xmm2); - } -} - -#endif diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp deleted file mode 100644 index 555abda7a7..0000000000 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp +++ /dev/null @@ -1,335 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2021 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" -#include "GS/GS_codegen.h" - -#if _M_SSE < 0x501 && !(defined(_M_AMD64) || defined(_WIN64)) - -static const int _args = 0; -static const int _vertex = _args + 4; -static const int _index = _args + 8; -static const int _dscan = _args + 12; - -void GSSetupPrimCodeGenerator::Generate_AVX() -{ - if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(edx, dword[esp + _dscan]); - - for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) - { - vmovaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]); - } - } - - Depth_AVX(); - - Texture_AVX(); - - Color_AVX(); - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth_AVX() -{ - if (!m_en.z && !m_en.f) - { - return; - } - - if (m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 p = dscan.p; - - vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, p)]); - - if (m_en.f) - { - // GSVector4 df = p.wwww(); - - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - - // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); - - vmulps(xmm2, xmm1, xmm3); - vcvttps2dq(xmm2, xmm2); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.d4.f], xmm2); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); - - vmulps(xmm2, xmm1, Xmm(4 + i)); - vcvttps2dq(xmm2, xmm2); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.d[i].f], xmm2); - } - } - - if (m_en.z) - { - // GSVector4 dz = p.zzzz(); - - vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d4.z = dz * 4.0f; - - vmulps(xmm1, xmm0, xmm3); - vmovdqa(ptr[&m_local.d4.z], xmm1); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].z = dz * m_shift[i]; - - vmulps(xmm1, xmm0, Xmm(4 + i)); - vmovdqa(ptr[&m_local.d[i].z], xmm1); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * 1]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - - vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); - - if (m_en.f) - { - // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); - - vcvttps2dq(xmm1, xmm0); - vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vmovdqa(ptr[&m_local.p.f], xmm1); - } - - if (m_en.z) - { - // uint32 z is bypassed in t.w - - vmovdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]); - vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - vmovdqa(ptr[&m_local.p.z], xmm0); - } - } -} - -void GSSetupPrimCodeGenerator::Texture_AVX() -{ - if (!m_en.t) - { - return; - } - - // GSVector4 t = dscan.t; - - vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, t)]); - - vmulps(xmm1, xmm0, xmm3); - - if (m_sel.fst) - { - // m_local.d4.stq = GSVector4i(t * 4.0f); - - vcvttps2dq(xmm1, xmm1); - - vmovdqa(ptr[&m_local.d4.stq], xmm1); - } - else - { - // m_local.d4.stq = t * 4.0f; - - vmovaps(ptr[&m_local.d4.stq], xmm1); - } - - for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector4 ds = t.xxxx(); - // GSVector4 dt = t.yyyy(); - // GSVector4 dq = t.zzzz(); - - vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4 v = ds/dt * m_shift[i]; - - vmulps(xmm2, xmm1, Xmm(4 + i)); - - if (m_sel.fst) - { - // m_local.d[i].s/t = GSVector4i(v); - - vcvttps2dq(xmm2, xmm2); - - switch (j) - { - case 0: vmovdqa(ptr[&m_local.d[i].s], xmm2); break; - case 1: vmovdqa(ptr[&m_local.d[i].t], xmm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - switch (j) - { - case 0: vmovaps(ptr[&m_local.d[i].s], xmm2); break; - case 1: vmovaps(ptr[&m_local.d[i].t], xmm2); break; - case 2: vmovaps(ptr[&m_local.d[i].q], xmm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color_AVX() -{ - if (!m_en.c) - { - return; - } - - if (m_sel.iip) - { - // GSVector4 c = dscan.c; - - vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); - - // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - vmulps(xmm1, xmm0, xmm3); - vcvttps2dq(xmm1, xmm1); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0)); - vpackssdw(xmm1, xmm1); - vmovdqa(ptr[&m_local.d4.c], xmm1); - - // xmm3 is not needed anymore - - // GSVector4 dr = c.xxxx(); - // GSVector4 db = c.zzzz(); - - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); - - vmulps(xmm0, xmm2, Xmm(4 + i)); - vcvttps2dq(xmm0, xmm0); - vpackssdw(xmm0, xmm0); - - // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); - - vmulps(xmm1, xmm3, Xmm(4 + i)); - vcvttps2dq(xmm1, xmm1); - vpackssdw(xmm1, xmm1); - - // m_local.d[i].rb = r.upl16(b); - - vpunpcklwd(xmm0, xmm1); - vmovdqa(ptr[&m_local.d[i].rb], xmm0); - } - - // GSVector4 c = dscan.c; - - vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - - // GSVector4 dg = c.yyyy(); - // GSVector4 da = c.wwww(); - - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); - - vmulps(xmm0, xmm2, Xmm(4 + i)); - vcvttps2dq(xmm0, xmm0); - vpackssdw(xmm0, xmm0); - - // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); - - vmulps(xmm1, xmm3, Xmm(4 + i)); - vcvttps2dq(xmm1, xmm1); - vpackssdw(xmm1, xmm1); - - // m_local.d[i].ga = g.upl16(a); - - vpunpcklwd(xmm0, xmm1); - vmovdqa(ptr[&m_local.d[i].ga], xmm0); - } - } - else - { - // GSVector4i c = GSVector4i(vertex[index[last].c); - - int last = 0; - - switch (m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * last]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - } - - vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); - - // c = c.upl16(c.zwxy()); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(xmm0, xmm1); - - // if(!tme) c = c.srl16(7); - - if (m_sel.tfx == TFX_NONE) - { - vpsrlw(xmm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vmovdqa(ptr[&m_local.c.rb], xmm1); - vmovdqa(ptr[&m_local.c.ga], xmm2); - } -} - -#endif diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp deleted file mode 100644 index a3f9ee653d..0000000000 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp +++ /dev/null @@ -1,360 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2021 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" -#include "GS/GS_codegen.h" - -#if _M_SSE >= 0x501 && !(defined(_M_AMD64) || defined(_WIN64)) - -static const int _args = 0; -static const int _vertex = _args + 4; -static const int _index = _args + 8; -static const int _dscan = _args + 12; - -void GSSetupPrimCodeGenerator::Generate_AVX2() -{ - if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(edx, dword[esp + _dscan]); - - for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) - { - vmovaps(Ymm(3 + i), ptr[g_const->m_shift_256b[i]]); - } - } - - Depth_AVX2(); - - Texture_AVX2(); - - Color_AVX2(); - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth_AVX2() -{ - if (!m_en.z && !m_en.f) - { - return; - } - - if (m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]); - - vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, p)]); - - vmulps(ymm1, ymm0, ymm3); - - if (m_en.z) - { - // m_local.d8.p.z = dp8.extract32<2>(); - - vextractps(ptr[&m_local.d8.p.z], xmm1, 2); - } - - if (m_en.f) - { - // m_local.d8.p.f = GSVector4i(dp8).extract32<3>(); - - vcvtps2dq(ymm2, ymm1); - vpextrd(ptr[&m_local.d8.p.f], xmm2, 3); - } - - if (m_en.z) - { - // GSVector8 dz = GSVector8(dscan.p).zzzz(); - - vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - } - - if (m_en.f) - { - // GSVector8 df = GSVector8(dscan.p).wwww(); - - vshufps(ymm1, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3)); - } - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - if (m_en.z) - { - // m_local.d[i].z = dz * shift[1 + i]; - - if (i < 4) - vmulps(ymm0, ymm2, Ymm(4 + i)); - else - vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); - vmovaps(ptr[&m_local.d[i].z], ymm0); - } - - if (m_en.f) - { - // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh(); - - if (i < 4) - vmulps(ymm0, ymm1, Ymm(4 + i)); - else - vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]); - vcvttps2dq(ymm0, ymm0); - vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.d[i].f], ymm0); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * 1]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - - if (m_en.f) - { - // m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>(); - - vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); - vcvttps2dq(xmm0, xmm0); - vpextrd(ptr[&m_local.p.f], xmm0, 3); - } - - if (m_en.z) - { - // m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w - - mov(eax, ptr[ecx + offsetof(GSVertexSW, t.w)]); - mov(ptr[&m_local.p.z], eax); - } - } -} - -void GSSetupPrimCodeGenerator::Texture_AVX2() -{ - if (!m_en.t) - { - return; - } - - // GSVector8 dt(dscan.t); - - vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, t)]); - - // GSVector8 dt8 = dt * shift[0]; - - vmulps(ymm1, ymm0, ymm3); - - if (m_sel.fst) - { - // m_local.d8.stq = GSVector8::cast(GSVector8i(dt8)); - - vcvttps2dq(ymm1, ymm1); - - vmovdqa(ptr[&m_local.d8.stq], xmm1); - } - else - { - // m_local.d8.stq = dt8; - - vmovaps(ptr[&m_local.d8.stq], xmm1); - } - - for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector8 dstq = dt.xxxx/yyyy/zzzz(); - - vshufps(ymm1, ymm0, ymm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8 v = dstq * shift[1 + i]; - - if (i < 4) - vmulps(ymm2, ymm1, Ymm(4 + i)); - else - vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]); - - if (m_sel.fst) - { - // m_local.d[i].s/t = GSVector8::cast(GSVector8i(v)); - - vcvttps2dq(ymm2, ymm2); - - switch (j) - { - case 0: vmovdqa(ptr[&m_local.d[i].s], ymm2); break; - case 1: vmovdqa(ptr[&m_local.d[i].t], ymm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - switch (j) - { - case 0: vmovaps(ptr[&m_local.d[i].s], ymm2); break; - case 1: vmovaps(ptr[&m_local.d[i].t], ymm2); break; - case 2: vmovaps(ptr[&m_local.d[i].q], ymm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color_AVX2() -{ - if (!m_en.c) - { - return; - } - - if (m_sel.iip) - { - // GSVector8 dc(dscan.c); - - vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, c)]); - - // m_local.d8.c = GSVector8i(dc * shift[0]).xzyw().ps32(); - - vmulps(ymm1, ymm0, ymm3); - vcvttps2dq(ymm1, ymm1); - vpshufd(ymm1, ymm1, _MM_SHUFFLE(3, 1, 2, 0)); - vpackssdw(ymm1, ymm1); - vmovq(ptr[&m_local.d8.c], xmm1); - - // ymm3 is not needed anymore - - // GSVector8 dr = dc.xxxx(); - // GSVector8 db = dc.zzzz(); - - vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(ymm3, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); - - if (i < 4) - vmulps(ymm0, ymm2, Ymm(4 + i)); - else - vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); - vcvttps2dq(ymm0, ymm0); - vpackssdw(ymm0, ymm0); - - // GSVector4i b = GSVector8i(db * shift[1 + i]).ps32(); - - if (i < 4) - vmulps(ymm1, ymm3, Ymm(4 + i)); - else - vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); - vcvttps2dq(ymm1, ymm1); - vpackssdw(ymm1, ymm1); - - // m_local.d[i].rb = r.upl16(b); - - vpunpcklwd(ymm0, ymm1); - vmovdqa(ptr[&m_local.d[i].rb], ymm0); - } - - // GSVector8 dc(dscan.c); - - vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - - // GSVector8 dg = dc.yyyy(); - // GSVector8 da = dc.wwww(); - - vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(ymm3, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3)); - - for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); - - if (i < 4) - vmulps(ymm0, ymm2, Ymm(4 + i)); - else - vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); - vcvttps2dq(ymm0, ymm0); - vpackssdw(ymm0, ymm0); - - // GSVector8i a = GSVector8i(da * shift[1 + i]).ps32(); - - if (i < 4) - vmulps(ymm1, ymm3, Ymm(4 + i)); - else - vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); - vcvttps2dq(ymm1, ymm1); - vpackssdw(ymm1, ymm1); - - // m_local.d[i].ga = g.upl16(a); - - vpunpcklwd(ymm0, ymm1); - vmovdqa(ptr[&m_local.d[i].ga], ymm0); - } - } - else - { - // GSVector8i c = GSVector8i(GSVector8(vertex[index[last]].c)); - - int last = 0; - - switch (m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * last]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - } - - vbroadcasti128(ymm0, ptr[ecx + offsetof(GSVertexSW, c)]); - vcvttps2dq(ymm0, ymm0); - - // c = c.upl16(c.zwxy()); - - vpshufd(ymm1, ymm0, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(ymm0, ymm1); - - // if(!tme) c = c.srl16(7); - - if (m_sel.tfx == TFX_NONE) - { - vpsrlw(ymm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - vpshufd(ymm1, ymm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(ymm2, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vmovdqa(ptr[&m_local.c.rb], ymm1); - vmovdqa(ptr[&m_local.c.ga], ymm2); - } -} - -#endif diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp deleted file mode 100644 index 0b70d8ffc7..0000000000 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp +++ /dev/null @@ -1,350 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2021 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" -#include "GS/GS_codegen.h" - -#if _M_SSE < 0x501 && !(defined(_M_AMD64) || defined(_WIN64)) - -static const int _args = 0; -static const int _vertex = _args + 4; -static const int _index = _args + 8; -static const int _dscan = _args + 12; - -void GSSetupPrimCodeGenerator::Generate_SSE() -{ - if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(edx, dword[esp + _dscan]); - - for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) - { - movaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]); - } - } - - Depth_SSE(); - - Texture_SSE(); - - Color_SSE(); - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth_SSE() -{ - if (!m_en.z && !m_en.f) - { - return; - } - - if (m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 p = dscan.p; - - movaps(xmm0, ptr[edx + offsetof(GSVertexSW, p)]); - - if (m_en.f) - { - // GSVector4 df = p.wwww(); - - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - - // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); - - movaps(xmm2, xmm1); - mulps(xmm2, xmm3); - cvttps2dq(xmm2, xmm2); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - movdqa(ptr[&m_local.d4.f], xmm2); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); - - movaps(xmm2, xmm1); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - movdqa(ptr[&m_local.d[i].f], xmm2); - } - } - - if (m_en.z) - { - // GSVector4 dz = p.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d4.z = dz * 4.0f; - - movaps(xmm1, xmm0); - mulps(xmm1, xmm3); - movdqa(ptr[&m_local.d4.z], xmm1); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].z = dz * m_shift[i]; - - movaps(xmm1, xmm0); - mulps(xmm1, Xmm(4 + i)); - movdqa(ptr[&m_local.d[i].z], xmm1); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * 1]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - - movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); - - if (m_en.f) - { - // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); - - cvttps2dq(xmm1, xmm0); - pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - movdqa(ptr[&m_local.p.f], xmm1); - } - - if (m_en.z) - { - // uint32 z is bypassed in t.w - - movdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]); - pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - movdqa(ptr[&m_local.p.z], xmm0); - } - } -} - -void GSSetupPrimCodeGenerator::Texture_SSE() -{ - if (!m_en.t) - { - return; - } - - // GSVector4 t = dscan.t; - - movaps(xmm0, ptr[edx + offsetof(GSVertexSW, t)]); - - movaps(xmm1, xmm0); - mulps(xmm1, xmm3); - - if (m_sel.fst) - { - // m_local.d4.stq = GSVector4i(t * 4.0f); - - cvttps2dq(xmm1, xmm1); - - movdqa(ptr[&m_local.d4.stq], xmm1); - } - else - { - // m_local.d4.stq = t * 4.0f; - - movaps(ptr[&m_local.d4.stq], xmm1); - } - - for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector4 ds = t.xxxx(); - // GSVector4 dt = t.yyyy(); - // GSVector4 dq = t.zzzz(); - - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4 v = ds/dt * m_shift[i]; - - movaps(xmm2, xmm1); - mulps(xmm2, Xmm(4 + i)); - - if (m_sel.fst) - { - // m_local.d[i].s/t = GSVector4i(v); - - cvttps2dq(xmm2, xmm2); - - switch (j) - { - case 0: movdqa(ptr[&m_local.d[i].s], xmm2); break; - case 1: movdqa(ptr[&m_local.d[i].t], xmm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - switch (j) - { - case 0: movaps(ptr[&m_local.d[i].s], xmm2); break; - case 1: movaps(ptr[&m_local.d[i].t], xmm2); break; - case 2: movaps(ptr[&m_local.d[i].q], xmm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color_SSE() -{ - if (!m_en.c) - { - return; - } - - if (m_sel.iip) - { - // GSVector4 c = dscan.c; - - movaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); - movaps(xmm1, xmm0); - - // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, xmm3); - cvttps2dq(xmm2, xmm2); - pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0)); - packssdw(xmm2, xmm2); - movdqa(ptr[&m_local.d4.c], xmm2); - - // xmm3 is not needed anymore - - // GSVector4 dr = c.xxxx(); - // GSVector4 db = c.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - packssdw(xmm2, xmm2); - - // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); - - movaps(xmm3, xmm1); - mulps(xmm3, Xmm(4 + i)); - cvttps2dq(xmm3, xmm3); - packssdw(xmm3, xmm3); - - // m_local.d[i].rb = r.upl16(b); - - punpcklwd(xmm2, xmm3); - movdqa(ptr[&m_local.d[i].rb], xmm2); - } - - // GSVector4 c = dscan.c; - - movaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - movaps(xmm1, xmm0); - - // GSVector4 dg = c.yyyy(); - // GSVector4 da = c.wwww(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - - for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - packssdw(xmm2, xmm2); - - // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); - - movaps(xmm3, xmm1); - mulps(xmm3, Xmm(4 + i)); - cvttps2dq(xmm3, xmm3); - packssdw(xmm3, xmm3); - - // m_local.d[i].ga = g.upl16(a); - - punpcklwd(xmm2, xmm3); - movdqa(ptr[&m_local.d[i].ga], xmm2); - } - } - else - { - // GSVector4i c = GSVector4i(vertex[index[last].c); - - int last = 0; - - switch (m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * last]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - } - - cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); - - // c = c.upl16(c.zwxy()); - - pshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - punpcklwd(xmm0, xmm1); - - // if(!tme) c = c.srl16(7); - - if (m_sel.tfx == TFX_NONE) - { - psrlw(xmm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - movdqa(ptr[&m_local.c.rb], xmm1); - movdqa(ptr[&m_local.c.ga], xmm2); - } -} - -#endif diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 64131f397b..2c787b648a 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -492,12 +492,6 @@ - - - - - - diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 87288df283..f734cc6721 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -1544,27 +1544,6 @@ System\Ps2\GS\Renderers\Software - - System\Ps2\GS\Renderers\Software - - - System\Ps2\GS\Renderers\Software - - - System\Ps2\GS\Renderers\Software - - - System\Ps2\GS\Renderers\Software - - - System\Ps2\GS\Renderers\Software - - - System\Ps2\GS\Renderers\Software - - - System\Ps2\GS\Renderers\Software - System\Ps2\GS\Renderers\Software