mirror of https://github.com/PCSX2/pcsx2.git
GS: Use MultiISA ProcessorFeatures struct for SW codegen
This commit is contained in:
parent
75da9809e9
commit
a45f674bc1
|
@ -20,8 +20,6 @@
|
||||||
#include "GS/Renderers/SW/GSScanlineEnvironment.h"
|
#include "GS/Renderers/SW/GSScanlineEnvironment.h"
|
||||||
#include "common/emitter/tools.h"
|
#include "common/emitter/tools.h"
|
||||||
|
|
||||||
#include <xbyak/xbyak_util.h>
|
|
||||||
|
|
||||||
template <class KEY, class VALUE>
|
template <class KEY, class VALUE>
|
||||||
class GSFunctionMap
|
class GSFunctionMap
|
||||||
{
|
{
|
||||||
|
@ -144,18 +142,6 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSCodeGenerator : public Xbyak::CodeGenerator
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
Xbyak::util::Cpu m_cpu;
|
|
||||||
|
|
||||||
public:
|
|
||||||
GSCodeGenerator(void* code, size_t maxsize)
|
|
||||||
: Xbyak::CodeGenerator(maxsize, code)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class CG, class KEY, class VALUE>
|
template <class CG, class KEY, class VALUE>
|
||||||
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
|
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
|
||||||
{
|
{
|
||||||
|
|
|
@ -81,7 +81,7 @@ using namespace Xbyak;
|
||||||
#define _rip_local_d_p(x) _rip_local_d(x)
|
#define _rip_local_d_p(x) _rip_local_d(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, u64 key)
|
GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key)
|
||||||
: _parent(base, cpu)
|
: _parent(base, cpu)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false)
|
, m_rip(false)
|
||||||
|
@ -243,7 +243,7 @@ void GSDrawScanlineCodeGenerator2::alltrue(const XYm& test)
|
||||||
u32 mask = test.isYMM() ? 0xffffffff : 0xffff;
|
u32 mask = test.isYMM() ? 0xffffffff : 0xffff;
|
||||||
pmovmskb(eax, test);
|
pmovmskb(eax, test);
|
||||||
cmp(eax, mask);
|
cmp(eax, mask);
|
||||||
je("step", GSCodeGenerator::T_NEAR);
|
je("step", Xbyak::CodeGenerator::T_NEAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::blend(const XYm& a, const XYm& b, const XYm& mask)
|
void GSDrawScanlineCodeGenerator2::blend(const XYm& a, const XYm& b, const XYm& mask)
|
||||||
|
|
|
@ -94,7 +94,7 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, u64 key);
|
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key);
|
||||||
void Generate();
|
void Generate();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -82,7 +82,7 @@ static bool shouldUseCDrawScanline(u64 key)
|
||||||
}
|
}
|
||||||
|
|
||||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
||||||
: GSCodeGenerator(code, maxsize)
|
: Xbyak::CodeGenerator(maxsize, code)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false)
|
, m_rip(false)
|
||||||
{
|
{
|
||||||
|
@ -108,5 +108,5 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, v
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSDrawScanlineCodeGenerator2(this, CPUInfo(m_cpu), (void*)&m_local, m_sel.key).Generate();
|
GSDrawScanlineCodeGenerator2(this, g_cpu, (void*)&m_local, m_sel.key).Generate();
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "GS/Renderers/Common/GSFunctionMap.h"
|
#include "GS/Renderers/Common/GSFunctionMap.h"
|
||||||
#include "GS/GSUtil.h"
|
#include "GS/GSUtil.h"
|
||||||
#include "GS/MultiISA.h"
|
#include "GS/MultiISA.h"
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
|
|
||||||
#if defined(_M_AMD64) || defined(_WIN64)
|
#if defined(_M_AMD64) || defined(_WIN64)
|
||||||
#define RegLong Xbyak::Reg64
|
#define RegLong Xbyak::Reg64
|
||||||
|
@ -28,7 +29,7 @@
|
||||||
|
|
||||||
MULTI_ISA_UNSHARED_START
|
MULTI_ISA_UNSHARED_START
|
||||||
|
|
||||||
class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
class GSDrawScanlineCodeGenerator : public Xbyak::CodeGenerator
|
||||||
{
|
{
|
||||||
void operator=(const GSDrawScanlineCodeGenerator&);
|
void operator=(const GSDrawScanlineCodeGenerator&);
|
||||||
|
|
||||||
|
|
|
@ -17,36 +17,7 @@
|
||||||
|
|
||||||
#include "xbyak/xbyak.h"
|
#include "xbyak/xbyak.h"
|
||||||
#include "xbyak/xbyak_util.h"
|
#include "xbyak/xbyak_util.h"
|
||||||
|
#include "GS/MultiISA.h"
|
||||||
namespace SSEVersion
|
|
||||||
{
|
|
||||||
enum SSEVersion
|
|
||||||
{
|
|
||||||
AVX2 = 0x501,
|
|
||||||
AVX = 0x500,
|
|
||||||
SSE41 = 0x401,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Similar to Xbyak::util::cpu but more open to us putting in extra flags (e.g. "vpgatherdd is fast"), as well as making it easier to test other configurations by artifically limiting features
|
|
||||||
struct CPUInfo
|
|
||||||
{
|
|
||||||
bool hasFMA = false;
|
|
||||||
SSEVersion::SSEVersion sseVersion = SSEVersion::SSE41;
|
|
||||||
|
|
||||||
CPUInfo() = default;
|
|
||||||
CPUInfo(const Xbyak::util::Cpu& cpu)
|
|
||||||
{
|
|
||||||
auto version = SSEVersion::SSE41;
|
|
||||||
if (cpu.has(cpu.tAVX))
|
|
||||||
version = SSEVersion::AVX;
|
|
||||||
if (cpu.has(cpu.tAVX2))
|
|
||||||
version = SSEVersion::AVX2;
|
|
||||||
|
|
||||||
hasFMA = cpu.has(cpu.tFMA);
|
|
||||||
sseVersion = version;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Code generator that automatically selects between SSE and AVX, x86 and x64 so you don't have to
|
/// Code generator that automatically selects between SSE and AVX, x86 and x64 so you don't have to
|
||||||
/// Should make combined SSE and AVX codegen much easier
|
/// Should make combined SSE and AVX codegen much easier
|
||||||
|
@ -130,10 +101,10 @@ public:
|
||||||
const RipType rip{};
|
const RipType rip{};
|
||||||
const Xbyak::AddressFrame ptr{0}, byte{8}, word{16}, dword{32}, qword{64}, xword{128}, yword{256}, zword{512};
|
const Xbyak::AddressFrame ptr{0}, byte{8}, word{16}, dword{32}, qword{64}, xword{128}, yword{256}, zword{512};
|
||||||
|
|
||||||
GSNewCodeGenerator(Xbyak::CodeGenerator* actual, CPUInfo cpu)
|
GSNewCodeGenerator(Xbyak::CodeGenerator* actual, const ProcessorFeatures& cpu)
|
||||||
: actual(*actual)
|
: actual(*actual)
|
||||||
, hasAVX(cpu.sseVersion >= SSEVersion::AVX)
|
, hasAVX(cpu.vectorISA >= ProcessorFeatures::VectorISA::AVX)
|
||||||
, hasAVX2(cpu.sseVersion >= SSEVersion::AVX2)
|
, hasAVX2(cpu.vectorISA >= ProcessorFeatures::VectorISA::AVX2)
|
||||||
, hasFMA(cpu.hasFMA)
|
, hasFMA(cpu.hasFMA)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,7 +48,7 @@ using namespace Xbyak;
|
||||||
#define _rip_local_d_p(x) _rip_local_d(x)
|
#define _rip_local_d_p(x) _rip_local_d(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, u64 key)
|
GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key)
|
||||||
: _parent(base, cpu)
|
: _parent(base, cpu)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false), many_regs(false)
|
, m_rip(false), many_regs(false)
|
||||||
|
|
|
@ -66,7 +66,7 @@ class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, u64 key);
|
GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key);
|
||||||
void Generate();
|
void Generate();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
MULTI_ISA_UNSHARED_IMPL;
|
MULTI_ISA_UNSHARED_IMPL;
|
||||||
|
|
||||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
||||||
: GSCodeGenerator(code, maxsize)
|
: Xbyak::CodeGenerator(maxsize, code)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false)
|
, m_rip(false)
|
||||||
{
|
{
|
||||||
|
@ -31,5 +31,5 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, u64 key, void* c
|
||||||
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
|
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
|
||||||
m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0;
|
m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0;
|
||||||
|
|
||||||
GSSetupPrimCodeGenerator2(this, CPUInfo(m_cpu), param, key).Generate();
|
GSSetupPrimCodeGenerator2(this, g_cpu, param, key).Generate();
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,13 +16,13 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "GSScanlineEnvironment.h"
|
#include "GSScanlineEnvironment.h"
|
||||||
#include "GS/Renderers/Common/GSFunctionMap.h"
|
|
||||||
#include "GS/GSUtil.h"
|
#include "GS/GSUtil.h"
|
||||||
#include "GS/MultiISA.h"
|
#include "GS/MultiISA.h"
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
|
|
||||||
MULTI_ISA_UNSHARED_START
|
MULTI_ISA_UNSHARED_START
|
||||||
|
|
||||||
class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
class GSSetupPrimCodeGenerator : public Xbyak::CodeGenerator
|
||||||
{
|
{
|
||||||
void operator=(const GSSetupPrimCodeGenerator&);
|
void operator=(const GSSetupPrimCodeGenerator&);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue