GS: Replace 6 DrawScanline code generators with one merged one

This commit is contained in:
TellowKrinkle 2021-08-28 00:32:20 -05:00 committed by refractionpcsx2
parent 805b647c73
commit f55219bb1b
6 changed files with 3709 additions and 1 deletions

View File

@ -639,6 +639,7 @@ set(pcsx2GSSources
GS/Renderers/HW/GSTextureCache.cpp GS/Renderers/HW/GSTextureCache.cpp
GS/Renderers/SW/GSDrawScanline.cpp GS/Renderers/SW/GSDrawScanline.cpp
GS/Renderers/SW/GSDrawScanlineCodeGenerator.cpp GS/Renderers/SW/GSDrawScanlineCodeGenerator.cpp
GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.cpp
GS/Renderers/SW/GSDrawScanlineCodeGenerator.x64.cpp GS/Renderers/SW/GSDrawScanlineCodeGenerator.x64.cpp
GS/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx.cpp GS/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx.cpp
GS/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx2.cpp GS/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx2.cpp
@ -708,6 +709,7 @@ set(pcsx2GSHeaders
GS/Renderers/HW/GSTextureCache.h GS/Renderers/HW/GSTextureCache.h
GS/Renderers/HW/GSVertexHW.h GS/Renderers/HW/GSVertexHW.h
GS/Renderers/SW/GSDrawScanlineCodeGenerator.h GS/Renderers/SW/GSDrawScanlineCodeGenerator.h
GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.h
GS/Renderers/SW/GSDrawScanline.h GS/Renderers/SW/GSDrawScanline.h
GS/Renderers/SW/GSNewCodeGenerator.h GS/Renderers/SW/GSNewCodeGenerator.h
GS/Renderers/SW/GSRasterizer.h GS/Renderers/SW/GSRasterizer.h

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,189 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "GSScanlineEnvironment.h"
#include "GSNewCodeGenerator.h"
#undef _t // Conflict with wx, hopefully no one needs this
#if _M_SSE >= 0x501
#define DRAW_SCANLINE_VECTOR_REGISTER Xbyak::Ymm
#define DRAW_SCANLINE_USING_XMM 0
#define DRAW_SCANLINE_USING_YMM 1
#else
#define DRAW_SCANLINE_VECTOR_REGISTER Xbyak::Xmm
#define DRAW_SCANLINE_USING_XMM 1
#define DRAW_SCANLINE_USING_YMM 0
#endif
class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
{
using _parent = GSNewCodeGenerator;
using XYm = DRAW_SCANLINE_VECTOR_REGISTER;
/// On x86-64 we reserve a bunch of GPRs for holding addresses of locals that would otherwise be hard to reach
/// On x86-32 the same values are just raw 32-bit addresses
using LocalAddr = Choose3264<size_t, AddressReg>::type;
constexpr static bool isXmm = std::is_same<XYm, Xbyak::Xmm>::value;
constexpr static bool isYmm = std::is_same<XYm, Xbyak::Ymm>::value;
constexpr static int wordsize = is64 ? 8 : 4;
constexpr static int vecsize = isXmm ? 16 : 32;
constexpr static int vecsizelog = isXmm ? 4 : 5;
constexpr static int vecints = vecsize / 4;
// MARK: - Constants
constexpr static int _32_args = 16;
constexpr static int _invalid = 0xaaaaaaaa;
#ifdef _WIN32
constexpr static int _64_top = 8 * 0;
// XMM registers will be saved to `rsp + _64_win_xmm_start + id - 6`
// Which will put xmm6 after the temporaries, them xmm7, etc
constexpr static int _64_win_xmm_start = 8 * 2;
// Windows has no redzone and also has 10 xmm registers to save
constexpr static int _64_win_stack_size = _64_win_xmm_start + 16 * 10;
#else
// System-V has a redzone so stick everything there
constexpr static int _64_rz_rbx = -8 * 1;
constexpr static int _64_rz_r12 = -8 * 2;
constexpr static int _64_rz_r13 = -8 * 3;
constexpr static int _64_rz_r14 = -8 * 4;
constexpr static int _64_rz_r15 = -8 * 5;
constexpr static int _64_top = -8 * 6;
#endif
constexpr static int _top = is64 ? _64_top : _32_args + 4;
constexpr static int _v = is64 ? _invalid : _32_args + 8;
GSScanlineSelector m_sel;
GSScanlineLocalData& m_local;
bool m_rip;
bool use_lod;
const XYm xym0{0}, xym1{1}, xym2{2}, xym3{3}, xym4{4}, xym5{5}, xym6{6}, xym7{7}, xym8{8}, xym9{9}, xym10{10}, xym11{11}, xym12{12}, xym13{13}, xym14{14}, xym15{15};
/// Note: a2 and t3 are only available on x86-64
/// Outside of Init, usable registers are a0, t0, t1, t2, t3[x64], rax, rbx, rdx, r10+
const AddressReg a0, a1, a2, a3, t0, t1, t2, t3;
const LocalAddr _g_const, _m_local, _m_local__gd, _m_local__gd__vm;
/// Available on both x86 and x64, not always valid
const XYm _rb, _ga, _fm, _zm, _fd, _test;
/// Always valid if needed, x64 only
const XYm _z, _f, _s, _t, _q, _f_rb, _f_ga;
/// Returns the first arg on 32-bit, second on 64-bit
static LocalAddr chooseLocal(const void* addr32, AddressReg reg64)
{
return choose3264((size_t)addr32, reg64);
}
public:
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key);
void Generate();
private:
/// Loads the given address into the given register if needed, and returns something that can be used in a `ptr[]`
LocalAddr loadAddress(AddressReg reg, const void* addr);
/// Broadcast 128 bits of floats from memory to the whole register, whatever size that register might be
void broadcastf128(const XYm& reg, const Xbyak::Address& mem);
/// Broadcast 128 bits of integers from memory to the whole register, whatever size that register might be
void broadcasti128(const XYm& reg, const Xbyak::Address& mem);
/// Broadcast a floating-point variable stored in GSScanlineLocalData to the whole register
/// On YMM registers this will be a broadcast from a 32-bit value
/// On XMM registers this will be a load of a full 128-bit value, with the broadcast happening before storing to the local data
void broadcastssLocal(const XYm& reg, const Xbyak::Address& mem);
/// Broadcast a qword variable stored in GSScanlineLocalData to the whole register
/// On YMM registers this will be a broadcast from a 64-bit value
/// On XMM registers this will be a load of a full 128-bit value, with the broadcast happening before storing to the local data
void pbroadcastqLocal(const XYm& reg, const Xbyak::Address& mem);
/// Broadcast a dword variable stored in GSScanlineLocalData to the whole register
/// On YMM registers this will be a broadcast from a 32-bit value
/// On XMM registers this will be a load of a full 128-bit value, with the broadcast happening before storing to the local data
void pbroadcastdLocal(const XYm& reg, const Xbyak::Address& mem);
/// Broadcast a word variable stored in GSScanlineLocalData to the whole register
/// On YMM registers this will be a broadcast from a 16-bit value
/// On XMM registers this will be a load of a full 128-bit value, with the broadcast happening before storing to the local data
void pbroadcastwLocal(const XYm& reg, const Xbyak::Address& mem);
/// Broadcast a 32-bit GPR to a vector register
void broadcastGPRToVec(const XYm& vec, const Xbyak::Reg32& gpr);
void modulate16(const XYm& a, const Xbyak::Operand& f, uint8 shift);
void lerp16(const XYm& a, const XYm& b, const XYm& f, uint8 shift);
void lerp16_4(const XYm& a, const XYm& b, const XYm& f);
void mix16(const XYm& a, const XYm& b, const XYm& temp);
void clamp16(const XYm& a, const XYm& temp);
void alltrue(const XYm& test);
void blend(const XYm& a, const XYm& b, const XYm& mask);
void blendr(const XYm& b, const XYm& a, const XYm& mask);
void blend8(const XYm& a, const XYm& b);
void blend8r(const XYm& b, const XYm& a);
void split16_2x8(const XYm& l, const XYm& h, const XYm& src);
void Init();
void Step();
void TestZ(const XYm& temp1, const XYm& temp2);
void SampleTexture();
void SampleTexture_TexelReadHelper(int mip_offset);
void Wrap(const XYm& uv);
void Wrap(const XYm& uv0, const XYm& uv1);
void SampleTextureLOD();
void WrapLOD(const XYm& uv);
void WrapLOD(const XYm& uv0, const XYm& uv1);
void AlphaTFX();
void ReadMask();
void TestAlpha();
void ColorTFX();
void Fog();
void ReadFrame();
void TestDestAlpha();
void WriteMask();
void WriteZBuf();
void AlphaBlend();
void WriteFrame();
void ReadPixel(const XYm& dst, const XYm& tmp, const AddressReg& addr);
#if DRAW_SCANLINE_USING_XMM
void WritePixel(const XYm& src_, const AddressReg& addr, const Xbyak::Reg8& mask, bool fast, int psm, int fz);
#else
void WritePixel(const XYm& src_, const AddressReg& addr, const Xbyak::Reg32& mask, bool fast, int psm, int fz);
#endif
void WritePixel(const Xmm& src, const AddressReg& addr, uint8 i, uint8 j, int psm);
void ReadTexel1(const XYm& dst, const XYm& src, const XYm& tmp1, const XYm& tmp2, int mip_offset);
void ReadTexel4(
const XYm& d0, const XYm& d1,
const XYm& d2s0, const XYm& d3s1,
const XYm& s2, const XYm& s3,
const XYm& tmp1, const XYm& tmp2,
int mip_offset);
void ReadTexelImpl(
const XYm& d0, const XYm& d1,
const XYm& d2s0, const XYm& d3s1,
const XYm& s2, const XYm& s3,
const XYm& tmp1, const XYm& tmp2,
int pixels, int mip_offset);
void ReadTexelImplLoadTexLOD(int lod, int mip_offset);
void ReadTexelImplYmm(
const Ymm& d0, const Ymm& d1,
const Ymm& d2s0, const Ymm& d3s1,
const Ymm& s2, const Ymm& s3,
const Ymm& tmp,
int pixels, int mip_offset);
void ReadTexelImplSSE4(
const Xmm& d0, const Xmm& d1,
const Xmm& d2s0, const Xmm& d3s1,
const Xmm& s2, const Xmm& s3,
int pixels, int mip_offset);
void ReadTexelImpl(const Xmm& dst, const Xmm& addr, uint8 i, bool texInA3, bool preserveDst);
};

View File

@ -15,6 +15,7 @@
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "GSDrawScanlineCodeGenerator.h" #include "GSDrawScanlineCodeGenerator.h"
#include "GSDrawScanlineCodeGenerator.all.h"
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
#else #else
@ -37,7 +38,7 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key
if (m_sel.breakpoint) if (m_sel.breakpoint)
db(0xCC); db(0xCC);
Generate(); GSDrawScanlineCodeGenerator2(this, CPUInfo(m_cpu), (void*)&m_local, m_sel.key).Generate();
} }
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift) void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift)

View File

@ -466,6 +466,7 @@
<ClCompile Include="GS\GSDrawingContext.cpp" /> <ClCompile Include="GS\GSDrawingContext.cpp" />
<ClCompile Include="GS\Renderers\SW\GSDrawScanline.cpp" /> <ClCompile Include="GS\Renderers\SW\GSDrawScanline.cpp" />
<ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.cpp" /> <ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.cpp" />
<ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.all.cpp" />
<ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.x64.avx.cpp" /> <ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.x64.avx.cpp" />
<ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.x64.avx2.cpp" /> <ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.x64.avx2.cpp" />
<ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.x64.cpp" /> <ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.x64.cpp" />
@ -830,6 +831,7 @@
<ClInclude Include="GS\GSDrawingEnvironment.h" /> <ClInclude Include="GS\GSDrawingEnvironment.h" />
<ClInclude Include="GS\Renderers\SW\GSDrawScanline.h" /> <ClInclude Include="GS\Renderers\SW\GSDrawScanline.h" />
<ClInclude Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.h" /> <ClInclude Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.h" />
<ClInclude Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.all.h" />
<ClInclude Include="GS\Renderers\SW\GSNewCodeGenerator.h" /> <ClInclude Include="GS\Renderers\SW\GSNewCodeGenerator.h" />
<ClInclude Include="GS\GSDump.h" /> <ClInclude Include="GS\GSDump.h" />
<ClInclude Include="GS\Renderers\Common\GSFastList.h" /> <ClInclude Include="GS\Renderers\Common\GSFastList.h" />

View File

@ -1517,6 +1517,9 @@
<ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.cpp"> <ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.cpp">
<Filter>System\Ps2\GS\Renderers\Software</Filter> <Filter>System\Ps2\GS\Renderers\Software</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.all.cpp">
<Filter>System\Ps2\GS\Renderers\Software</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.x64.avx.cpp"> <ClCompile Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.x64.avx.cpp">
<Filter>System\Ps2\GS\Renderers\Software</Filter> <Filter>System\Ps2\GS\Renderers\Software</Filter>
</ClCompile> </ClCompile>
@ -2616,6 +2619,9 @@
<ClInclude Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.h"> <ClInclude Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.h">
<Filter>System\Ps2\GS\Renderers\Software</Filter> <Filter>System\Ps2\GS\Renderers\Software</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="GS\Renderers\SW\GSDrawScanlineCodeGenerator.all.h">
<Filter>System\Ps2\GS\Renderers\Software</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\SW\GSNewCodeGenerator.h"> <ClInclude Include="GS\Renderers\SW\GSNewCodeGenerator.h">
<Filter>System\Ps2\GS\Renderers\Software</Filter> <Filter>System\Ps2\GS\Renderers\Software</Filter>
</ClInclude> </ClInclude>